diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8da2b794..4a5ba48c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,13 +34,15 @@ jobs: with: build-args: "--wfail -v" test: true - - run: | - lake build bench-aiur - lake build bench-blake3 - name: Test Ix CLI run: | mkdir -p ~/.local/bin lake test -- cli + - run: lake exe test-aiur + - run: lake exe test-ixvm + - run: | + lake build bench-aiur + lake build bench-blake3 - name: Check lean.h.hash run: lake run check-lean-h-hash diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index b7767b65..e39dc4be 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -30,9 +30,11 @@ jobs: # Ix benches - run: nix build .#bench-aiur --print-build-logs --accept-flake-config - run: nix build .#bench-blake3 --print-build-logs --accept-flake-config + - run: nix build .#bench-shardmap --print-build-logs --accept-flake-config # Ix tests - - run: nix build .#test --print-build-logs --accept-flake-config - run: nix run .#test --accept-flake-config + - run: nix run .#test-aiur --accept-flake-config + - run: nix run .#test-ixvm --accept-flake-config # Tests Nix devShell support on Ubuntu nix-devshell: diff --git a/Apps/ZKVoting/Prover.lean b/Apps/ZKVoting/Prover.lean index 8a8e3ecf..1fbf56a3 100644 --- a/Apps/ZKVoting/Prover.lean +++ b/Apps/ZKVoting/Prover.lean @@ -1,4 +1,5 @@ import Ix.Claim +import Ix.Commit import Ix.Address import Ix.Meta import Ix.CompileM @@ -14,7 +15,7 @@ structure Result where charlieVotes: Nat deriving Repr, Lean.ToExpr -def privateVotes : List Candidate := +def privateVotes : List Candidate := [.alice, .alice, .bob] def runElection (votes: List Candidate) : Result := @@ -26,37 +27,31 @@ def runElection (votes: List Candidate) : Result := | .bob, ⟨a, b, c⟩ => ⟨a, b+1, c⟩ | .charlie, ⟨a, b, c⟩ => ⟨a, b, c+1⟩ -open Ix.Compile - def main : IO UInt32 := do - let mut env : Lean.Environment := <- get_env! + let mut env : Lean.Environment ← get_env! + + -- 1. Full compilation via Rust + let phases ← Ix.CompileM.rsCompilePhases env + let mut compileEnv := Ix.Commit.mkCompileEnv phases + -- simulate getting the votes from somewhere - let votes : List Candidate <- pure privateVotes + let votes : List Candidate ← pure privateVotes let mut as : List Lean.Name := [] -- the type of each vote to commit let voteType := Lean.toTypeExpr Candidate -- loop over the votes for v in votes do -- add each vote to our environment as a commitment - let (lvls, typ, val) <- runMeta (metaMakeDef v) env - let ((addr, _), stt) <- (commitDef lvls typ val).runIO env - env := stt.env - as := (Address.toUniqueName addr)::as - IO.println s!"vote: {repr v}, commitment: {addr}" + let (lvls, typ, val) ← runMeta (metaMakeDef v) env + let (commitAddr, env', compileEnv') ← Ix.Commit.commitDef compileEnv env lvls typ val + env := env' + compileEnv := compileEnv' + as := (Address.toUniqueName commitAddr)::as + IO.println s!"vote: {repr v}, commitment: {commitAddr}" -- build a Lean list of our commitments as the argument to runElection - let arg : Lean.Expr <- runMeta (metaMakeList voteType as) env - let (lvls, input, output, type, sort) <- + let arg : Lean.Expr ← runMeta (metaMakeList voteType as) env + let (lvls, input, output, type, _sort) ← runMeta (metaMakeEvalClaim ``runElection [arg]) env - let inputPretty <- runMeta (Lean.Meta.ppExpr input) env - let outputPretty <- runMeta (Lean.Meta.ppExpr output) env - let typePretty <- runMeta (Lean.Meta.ppExpr type) env - IO.println s!"claim: {inputPretty}" - IO.println s!" ~> {outputPretty}" - IO.println s!" : {typePretty}" - IO.println s!" @ {repr lvls}" - let ((claim,_,_,_), _stt') <- - (evalClaim lvls input output type sort true).runIO env + let claim ← IO.ofExcept <| Ix.Commit.evalClaim compileEnv lvls input output type IO.println s!"{claim}" - -- Ix.prove claim stt return 0 - diff --git a/Apps/ZKVoting/Verifier.lean b/Apps/ZKVoting/Verifier.lean index eee11247..d018db86 100644 --- a/Apps/ZKVoting/Verifier.lean +++ b/Apps/ZKVoting/Verifier.lean @@ -1,7 +1,6 @@ -import Ix.Ixon.Serialize -import Ix.Prove +import Ix.Claim -def main (args : List String) : IO UInt32 := do +def main (_args : List String) : IO UInt32 := do -- TODO -- let mut votes := #[] -- for commStr in args do diff --git a/Benchmarks/ShardMap.lean b/Benchmarks/ShardMap.lean new file mode 100644 index 00000000..37cec826 --- /dev/null +++ b/Benchmarks/ShardMap.lean @@ -0,0 +1,351 @@ +import Ix.ShardMap +import Ix.Benchmark.Bench + +open Ix + +namespace Benchmarks.ShardMap + +/-! ## Helper Functions -/ + +/-- Insert N sequential keys into a map -/ +def insertN (map : ShardMap Nat Nat) (n : Nat) : IO Unit := do + for i in [:n] do + map.insert i i + +/-- Look up N sequential keys (all hits) -/ +def getN (map : ShardMap Nat Nat) (n : Nat) : IO Unit := do + for i in [:n] do + let _ ← map.get? i + +/-- Look up N keys that don't exist (all misses) -/ +def getMissN (map : ShardMap Nat Nat) (n : Nat) (offset : Nat) : IO Unit := do + for i in [:n] do + let _ ← map.get? (offset + i) + +/-- Remove N sequential keys -/ +def removeN (map : ShardMap Nat Nat) (n : Nat) : IO Unit := do + for i in [:n] do + let _ ← map.remove i + +/-- Insert items one by one in a loop -/ +def insertLoop (map : ShardMap Nat Nat) (items : Array (Nat × Nat)) : IO Unit := do + for (k, v) in items do + map.insert k v + +/-- Generate an array of N key-value pairs -/ +def genItems (n : Nat) : Array (Nat × Nat) := + Array.range n |>.map fun i => (i, i) + +/-- Concurrent workload: each thread inserts and reads its own range of keys -/ +def concurrentWorkload (map : ShardMap Nat Nat) (threads : Nat) (opsPerThread : Nat) : IO Unit := do + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for t in [:threads] do + let task ← IO.asTask do + for i in [:opsPerThread] do + let key := t * opsPerThread + i + map.insert key key + let _ ← map.get? key + tasks := tasks.push task + for task in tasks do + let _ ← IO.ofExcept task.get + +/-- Concurrent reads only: all threads read the same pre-populated keys -/ +def concurrentReads (map : ShardMap Nat Nat) (threads : Nat) (keysToRead : Nat) : IO Unit := do + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for _ in [:threads] do + let task ← IO.asTask do + for i in [:keysToRead] do + let _ ← map.get? i + tasks := tasks.push task + for task in tasks do + let _ ← IO.ofExcept task.get + +/-- Concurrent writes only: each thread writes to different keys -/ +def concurrentWrites (map : ShardMap Nat Nat) (threads : Nat) (keysPerThread : Nat) : IO Unit := do + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for t in [:threads] do + let task ← IO.asTask do + for i in [:keysPerThread] do + let key := t * keysPerThread + i + map.insert key key + tasks := tasks.push task + for task in tasks do + let _ ← IO.ofExcept task.get + +/-- Mixed read/write workload with configurable read ratio -/ +def mixedWorkload (map : ShardMap Nat Nat) (threads : Nat) (opsPerThread : Nat) + (readRatio : Float) : IO Unit := do + let readOps := (readRatio * opsPerThread.toFloat).toUInt64.toNat + let writeOps := opsPerThread - readOps + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for t in [:threads] do + let task ← IO.asTask do + -- Writes first + for i in [:writeOps] do + let key := t * opsPerThread + i + map.insert key key + -- Then reads + for i in [:readOps] do + let key := t * opsPerThread + (i % writeOps.max 1) + let _ ← map.get? key + tasks := tasks.push task + for task in tasks do + let _ ← IO.ofExcept task.get + +/-- Hot shard workload: all threads access keys that hash to same shard -/ +def hotShardWorkload (map : ShardMap Nat Nat) (threads : Nat) (opsPerThread : Nat) : IO Unit := do + -- All keys will be multiples of 256 to hit the same shard (assuming 256 shards) + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for t in [:threads] do + let task ← IO.asTask do + for i in [:opsPerThread] do + let key := (t * opsPerThread + i) * 256 + map.insert key key + let _ ← map.get? key + tasks := tasks.push task + for task in tasks do + let _ ← IO.ofExcept task.get + +/-- Distributed workload: keys are spread across all shards -/ +def distributedWorkload (map : ShardMap Nat Nat) (threads : Nat) (opsPerThread : Nat) : IO Unit := do + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for t in [:threads] do + let task ← IO.asTask do + for i in [:opsPerThread] do + -- Use a prime multiplier to distribute across shards + let key := (t * opsPerThread + i) * 127 + map.insert key key + let _ ← map.get? key + tasks := tasks.push task + for task in tasks do + let _ ← IO.ofExcept task.get + +/-! ## Benchmark Groups -/ + +/-- Basic single-threaded operations -/ +def basicBench : IO (Array BenchReport) := do + IO.println "=== Basic Operations (Single-threaded) ===\n" + bgroup "shardmap-basic" [ + -- Insert throughput + benchIO "insert 1K" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertN map 1000) (), + benchIO "insert 10K" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertN map 10000) (), + benchIO "insert 100K" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertN map 100000) (), + + -- Lookup throughput (pre-populated) + benchIO "get 10K hits" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertN map 10000 + getN map 10000) (), + benchIO "get 10K misses" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertN map 10000 + getMissN map 10000 100000) (), + + -- Remove throughput + benchIO "remove 10K" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertN map 10000 + removeN map 10000) () + ] + +/-- Compare insertMany vs individual inserts -/ +def bulkBench : IO (Array BenchReport) := do + IO.println "=== Bulk Operations Comparison ===\n" + let items10K := genItems 10000 + bgroup "shardmap-bulk" [ + benchIO "insertMany 10K" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + map.insertMany items10K) (), + benchIO "insert loop 10K" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertLoop map items10K) () + ] + +/-- Concurrent read scaling with SharedMutex -/ +def concurrentReadBench : IO (Array BenchReport) := do + IO.println "=== Concurrent Read Scaling ===\n" + -- Pre-populate a shared map + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertN map 10000 + bgroup "shardmap-concurrent-read" [ + benchIO "read 1 thread" (fun () => concurrentReads map 1 10000) (), + benchIO "read 2 threads" (fun () => concurrentReads map 2 5000) (), + benchIO "read 4 threads" (fun () => concurrentReads map 4 2500) (), + benchIO "read 8 threads" (fun () => concurrentReads map 8 1250) (), + benchIO "read 16 threads" (fun () => concurrentReads map 16 625) () + ] + +/-- Concurrent write scaling -/ +def concurrentWriteBench : IO (Array BenchReport) := do + IO.println "=== Concurrent Write Scaling ===\n" + bgroup "shardmap-concurrent-write" [ + benchIO "write 1 thread 10K" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + concurrentWrites map 1 10000) (), + benchIO "write 2 threads 5K each" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + concurrentWrites map 2 5000) (), + benchIO "write 4 threads 2.5K each" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + concurrentWrites map 4 2500) (), + benchIO "write 8 threads 1.25K each" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + concurrentWrites map 8 1250) () + ] + +/-- Mixed read/write workload -/ +def mixedWorkloadBench : IO (Array BenchReport) := do + IO.println "=== Mixed Read/Write Workload (8 threads) ===\n" + bgroup "shardmap-mixed" [ + benchIO "mixed 50/50 8 threads" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + mixedWorkload map 8 1000 0.5) (), + benchIO "mixed 80/20 read/write 8 threads" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + mixedWorkload map 8 1000 0.8) (), + benchIO "mixed 95/5 read/write 8 threads" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + mixedWorkload map 8 1000 0.95) () + ] + +/-- Compare different shard configurations -/ +def shardConfigBench : IO (Array BenchReport) := do + IO.println "=== Shard Configuration Impact (8 threads) ===\n" + bgroup "shardmap-shards" [ + benchIO "shardBits=2 (4 shards)" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) (shardBits := 2) + concurrentWorkload map 8 1000) (), + benchIO "shardBits=4 (16 shards)" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) (shardBits := 4) + concurrentWorkload map 8 1000) (), + benchIO "shardBits=6 (64 shards)" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) (shardBits := 6) + concurrentWorkload map 8 1000) (), + benchIO "shardBits=8 (256 shards)" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) (shardBits := 8) + concurrentWorkload map 8 1000) () + ] + +/-- Contention patterns: hot shard vs distributed access -/ +def contentionBench : IO (Array BenchReport) := do + IO.println "=== Contention Patterns (8 threads) ===\n" + bgroup "shardmap-contention" [ + benchIO "hot shard (worst case)" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + hotShardWorkload map 8 500) (), + benchIO "distributed (best case)" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + distributedWorkload map 8 500) () + ] + +/-- Pre-allocated capacity impact -/ +def capacityBench : IO (Array BenchReport) := do + IO.println "=== Capacity Pre-allocation Impact ===\n" + bgroup "shardmap-capacity" [ + benchIO "insert 10K (no prealloc)" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertN map 10000) (), + benchIO "insert 10K (prealloc 32/shard)" (fun () => do + let map ← Ix.ShardMap.newWithCapacity (α := Nat) (β := Nat) + (capacityPerShard := 32) + insertN map 10000) (), + benchIO "insert 10K (prealloc 64/shard)" (fun () => do + let map ← Ix.ShardMap.newWithCapacity (α := Nat) (β := Nat) + (capacityPerShard := 64) + insertN map 10000) () + ] + +/-- Look up N sequential keys using get?Fast (all hits) -/ +def getNFast (map : ShardMap Nat Nat) (n : Nat) : IO Unit := do + for i in [:n] do + let _ ← map.get?Fast i + +/-- Concurrent reads using get?Fast -/ +def concurrentReadsFast (map : ShardMap Nat Nat) (threads : Nat) (keysToRead : Nat) : IO Unit := do + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for _ in [:threads] do + let task ← IO.asTask do + for i in [:keysToRead] do + let _ ← map.get?Fast i + tasks := tasks.push task + for task in tasks do + let _ ← IO.ofExcept task.get + +/-- Hot shard workload using get?Fast: all threads access keys that hash to same shard -/ +def hotShardWorkloadFast (map : ShardMap Nat Nat) (threads : Nat) (opsPerThread : Nat) : IO Unit := do + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for t in [:threads] do + let task ← IO.asTask do + for i in [:opsPerThread] do + let key := (t * opsPerThread + i) * 256 + map.insert key key + let _ ← map.get?Fast key + tasks := tasks.push task + for task in tasks do + let _ ← IO.ofExcept task.get + +/-- Compare get? vs get?Fast under different contention levels -/ +def fastPathBench : IO (Array BenchReport) := do + IO.println "=== get?Fast vs get? Comparison ===\n" + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertN map 10000 + bgroup "shardmap-fast-path" [ + -- Single-threaded comparison + benchIO "get? 10K (single thread)" (fun () => getN map 10000) (), + benchIO "get?Fast 10K (single thread)" (fun () => getNFast map 10000) (), + -- Concurrent comparison + benchIO "get? 8 threads" (fun () => concurrentReads map 8 1250) (), + benchIO "get?Fast 8 threads" (fun () => concurrentReadsFast map 8 1250) (), + -- Hot shard comparison (high contention) + benchIO "hot shard get?" (fun () => do + let m ← Ix.ShardMap.new (α := Nat) (β := Nat) + hotShardWorkload m 8 500) (), + benchIO "hot shard get?Fast" (fun () => do + let m ← Ix.ShardMap.new (α := Nat) (β := Nat) + hotShardWorkloadFast m 8 500) () + ] + +/-- Compare sequential vs parallel insertMany -/ +def parallelInsertBench : IO (Array BenchReport) := do + IO.println "=== Parallel insertMany Performance ===\n" + let items50K := genItems 50000 + let items100K := genItems 100000 + bgroup "shardmap-parallel-insert" [ + benchIO "insertMany 50K items" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + map.insertMany items50K) (), + benchIO "insertMany 100K items" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + map.insertMany items100K) (), + benchIO "insert loop 50K items" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertLoop map items50K) (), + benchIO "insert loop 100K items" (fun () => do + let map ← Ix.ShardMap.new (α := Nat) (β := Nat) + insertLoop map items100K) () + ] + +end Benchmarks.ShardMap + +def main : IO Unit := do + IO.println "ShardMap Performance Benchmarks\n" + IO.println "================================\n" + + let _ ← Benchmarks.ShardMap.basicBench + let _ ← Benchmarks.ShardMap.bulkBench + let _ ← Benchmarks.ShardMap.concurrentReadBench + let _ ← Benchmarks.ShardMap.concurrentWriteBench + let _ ← Benchmarks.ShardMap.mixedWorkloadBench + let _ ← Benchmarks.ShardMap.shardConfigBench + let _ ← Benchmarks.ShardMap.contentionBench + let _ ← Benchmarks.ShardMap.capacityBench + let _ ← Benchmarks.ShardMap.fastPathBench + let _ ← Benchmarks.ShardMap.parallelInsertBench + + IO.println "\nBenchmarks complete!" diff --git a/Cargo.lock b/Cargo.lock index 893dc653..5b9da024 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -266,9 +266,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cc" diff --git a/Cargo.toml b/Cargo.toml index 5c5104d5..8813e739 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,3 +40,9 @@ quickcheck_macros = "1.0.0" default = [] parallel = ["multi-stark/parallel"] net = ["bytes", "tokio", "iroh", "iroh-base", "n0-snafu", "n0-watcher", "rand", "tracing", "tracing-subscriber", "bincode", "serde" ] + +[profile.dev] +panic = "abort" + +[profile.release] +panic = "abort" diff --git a/Ix.lean b/Ix.lean index 7b45789a..55a400b6 100644 --- a/Ix.lean +++ b/Ix.lean @@ -1,9 +1,14 @@ -- This module serves as the root of the `Ix` library. -- Import modules here that should be built as part of the library. +import Ix.Environment +import Ix.CanonM import Ix.Ixon +import Ix.Sharing import Ix.Meta import Ix.GraphM import Ix.CondenseM import Ix.CompileM import Ix.DecompileM +import Ix.Claim +import Ix.Commit import Ix.Benchmark.Bench diff --git a/Ix/Address.lean b/Ix/Address.lean index 7c9a6f84..ee11eb85 100644 --- a/Ix/Address.lean +++ b/Ix/Address.lean @@ -6,12 +6,15 @@ import Blake3 deriving instance Lean.ToExpr for ByteArray deriving instance Repr for ByteArray +/-- A 32-byte Blake3 content hash used as a content address for Ix objects. -/ structure Address where hash : ByteArray deriving Lean.ToExpr, BEq, Hashable +/-- Compute the Blake3 hash of a `ByteArray`, returning an `Address`. -/ def Address.blake3 (x: ByteArray) : Address := ⟨(Blake3.hash x).val⟩ +/-- Convert a nibble (0--15) to its lowercase hexadecimal character. -/ def hexOfNat : Nat -> Option Char | 0 => .some '0' | 1 => .some '1' @@ -31,6 +34,7 @@ def hexOfNat : Nat -> Option Char | 15 => .some 'f' | _ => .none +/-- Parse a hexadecimal character (case-insensitive) into a nibble value 0--15. -/ def natOfHex : Char -> Option Nat | '0' => .some 0 | '1' => .some 1 @@ -78,12 +82,14 @@ instance : Ord Address where instance : Inhabited Address where default := Address.blake3 ⟨#[]⟩ +/-- Decode two hex characters (high nibble, low nibble) into a single byte. -/ def byteOfHex : Char -> Char -> Option UInt8 | hi, lo => do let hi <- natOfHex hi let lo <- natOfHex lo UInt8.ofNat (hi <<< 4 + lo) +/-- Parse a hexadecimal string into a `ByteArray`. Returns `none` on odd length or invalid chars. -/ def bytesOfHex (s: String) : Option ByteArray := do let bs <- go s.toList return ⟨bs.toArray⟩ @@ -96,22 +102,18 @@ def bytesOfHex (s: String) : Option ByteArray := do | [] => return [] | _ => .none +/-- Parse a 64-character hex string into an `Address`. Returns `none` if the string is not a valid 32-byte hex encoding. -/ def Address.fromString (s: String) : Option Address := do let ba <- bytesOfHex s if ba.size == 32 then .some ⟨ba⟩ else .none +/-- Encode an `Address` as a hierarchical `Lean.Name` under the `Ix._#` namespace. -/ def Address.toUniqueName (addr: Address): Lean.Name := .str (.str (.str .anonymous "Ix") "_#") (hexOfBytes addr.hash) +/-- Decode an `Address` from a `Lean.Name` previously created by `Address.toUniqueName`. -/ def Address.fromUniqueName (name: Lean.Name) : Option Address := match name with | .str (.str (.str .anonymous "Ix") "_#") s => Address.fromString s | _ => .none -structure MetaAddress where - data : Address - «meta» : Address - deriving Inhabited, Nonempty, Lean.ToExpr, BEq, Hashable, Repr, Ord - -instance : ToString MetaAddress where - toString adr := s!"{hexOfBytes adr.data.hash}:{hexOfBytes adr.meta.hash}" diff --git a/Ix/Benchmark/Bench.lean b/Ix/Benchmark/Bench.lean index 30de8e62..3cbd24db 100644 --- a/Ix/Benchmark/Bench.lean +++ b/Ix/Benchmark/Bench.lean @@ -1,6 +1,5 @@ import Ix.Address import Ix.Meta -import Ix.CompileM import Ix.Cronos import Ix.Address import Batteries diff --git a/Ix/Benchmark/Serde.lean b/Ix/Benchmark/Serde.lean index da9e9462..3937f3ff 100644 --- a/Ix/Benchmark/Serde.lean +++ b/Ix/Benchmark/Serde.lean @@ -2,103 +2,122 @@ import Ix.Ixon import Ix.Benchmark.Change import Ix.Benchmark.OneShot -@[inline] def putFloat (x : Float) : Ixon.PutM Unit := Ixon.putUInt64LE x.toBits -@[inline] def getFloat : Ixon.GetM Float := Ixon.getUInt64LE.map Float.ofBits +open Ixon -instance : Ixon.Serialize Float where +-- Local Serialize instances for benchmark data types (ephemeral format) + +instance : Serialize Nat where + put n := putU64LE n.toUInt64 + get := do let v ← getU64LE; return v.toNat + +instance [Serialize α] : Serialize (List α) where + put xs := do + putU64LE xs.length.toUInt64 + for x in xs do Serialize.put x + get := do + let n ← getU64LE + let mut xs := [] + for _ in List.range n.toNat do + xs := (← Serialize.get) :: xs + return xs.reverse + +@[inline] def putFloat (x : Float) : PutM Unit := putU64LE x.toBits +@[inline] def getFloat : GetM Float := getU64LE.map Float.ofBits + +instance : Serialize Float where put := putFloat get := getFloat -def putTupleNat (xy : Nat × Nat) : Ixon.PutM Unit := do - Ixon.putNat Ixon.putBytesTagged xy.fst - Ixon.putNat Ixon.putBytesTagged xy.snd +def putTupleNat (xy : Nat × Nat) : PutM Unit := do + Serialize.put xy.fst + Serialize.put xy.snd -def getTupleNat : Ixon.GetM (Nat × Nat) := do - return (← Ixon.Serialize.get, ← Ixon.Serialize.get) +def getTupleNat : GetM (Nat × Nat) := do + return (← Serialize.get, ← Serialize.get) -instance : Ixon.Serialize (Nat × Nat) where +instance : Serialize (Nat × Nat) where put := putTupleNat - get := getTupleNat + get := getTupleNat -def putData (data : Data) : Ixon.PutM Unit := do - Ixon.Serialize.put data.d.toList +def putData (data : Data) : PutM Unit := do + Serialize.put data.d.toList -def getData : Ixon.GetM Data := do - let data : List (Nat × Nat) ← Ixon.Serialize.get - return { d := data.toArray } +def getData : GetM Data := do + let data : List (Nat × Nat) ← Serialize.get + return { d := data.toArray } -instance : Ixon.Serialize Data where - put := putData +instance : Serialize Data where + put := putData get := getData -def putConfidenceInterval (ci : ConfidenceInterval) : Ixon.PutM Unit := do +def putConfidenceInterval (ci : ConfidenceInterval) : PutM Unit := do putFloat ci.confidenceLevel putFloat ci.lowerBound putFloat ci.upperBound -def getConfidenceInterval : Ixon.GetM ConfidenceInterval := do +def getConfidenceInterval : GetM ConfidenceInterval := do return { confidenceLevel := (← getFloat), lowerBound := (← getFloat), upperBound := (← getFloat)} -instance : Ixon.Serialize ConfidenceInterval where - put := putConfidenceInterval - get := getConfidenceInterval +instance : Serialize ConfidenceInterval where + put := putConfidenceInterval + get := getConfidenceInterval -def putEstimate (est : Estimate) : Ixon.PutM Unit := do +def putEstimate (est : Estimate) : PutM Unit := do putConfidenceInterval est.confidenceInterval putFloat est.pointEstimate putFloat est.stdErr -def getEstimate : Ixon.GetM Estimate := do +def getEstimate : GetM Estimate := do return { confidenceInterval := (← getConfidenceInterval), pointEstimate := (← getFloat), stdErr := (← getFloat)} -instance : Ixon.Serialize Estimate where +instance : Serialize Estimate where put := putEstimate get := getEstimate -def putEstimates (est : Estimates) : Ixon.PutM Unit := do +def putEstimates (est : Estimates) : PutM Unit := do putEstimate est.mean putEstimate est.median putEstimate est.medianAbsDev if let .some x := est.slope then - Ixon.putUInt8 1 + putU8 1 putEstimate x else - Ixon.putUInt8 0 + putU8 0 putEstimate est.stdDev -def getEstimates : Ixon.GetM Estimates := do +def getEstimates : GetM Estimates := do let mean ← getEstimate let median ← getEstimate let medianAbsDev ← getEstimate - let slope ← match (← Ixon.getUInt8) with + let slope ← match (← getU8) with | 1 => pure $ some (← getEstimate) | _ => pure none let stdDev ← getEstimate return { mean, median, medianAbsDev, slope, stdDev } -instance : Ixon.Serialize Estimates where +instance : Serialize Estimates where put := putEstimates get := getEstimates -def putChangeEstimates (changeEst : ChangeEstimates) : Ixon.PutM Unit := do +def putChangeEstimates (changeEst : ChangeEstimates) : PutM Unit := do putEstimate changeEst.mean putEstimate changeEst.median -def getChangeEstimates : Ixon.GetM ChangeEstimates := do +def getChangeEstimates : GetM ChangeEstimates := do let mean ← getEstimate let median ← getEstimate return { mean, median } -instance : Ixon.Serialize ChangeEstimates where +instance : Serialize ChangeEstimates where put := putChangeEstimates get := getChangeEstimates -def getOneShot: Ixon.GetM OneShot := do - return { benchTime := (← Ixon.Serialize.get) } +def getOneShot: GetM OneShot := do + return { benchTime := (← Serialize.get) } -instance : Ixon.Serialize OneShot where - put os := Ixon.Serialize.put os.benchTime +instance : Serialize OneShot where + put os := Serialize.put os.benchTime get := getOneShot /-- Writes JSON to disk at `benchPath/fileName` -/ @@ -107,11 +126,11 @@ def storeJson [Lean.ToJson α] (data : α) (benchPath : System.FilePath) : IO Un IO.FS.writeFile benchPath json.pretty /-- Writes Ixon to disk at `benchPath/fileName` -/ -def storeIxon [Ixon.Serialize α] (data : α) (benchPath : System.FilePath) : IO Unit := do - let ixon := Ixon.ser data +def storeIxon [Serialize α] (data : α) (benchPath : System.FilePath) : IO Unit := do + let ixon := ser data IO.FS.writeBinFile benchPath ixon -def storeFile [Lean.ToJson α] [Ixon.Serialize α] (fmt : SerdeFormat) (data: α) (path : System.FilePath) : IO Unit := do +def storeFile [Lean.ToJson α] [Serialize α] (fmt : SerdeFormat) (data: α) (path : System.FilePath) : IO Unit := do match fmt with | .json => storeJson data path | .ixon => storeIxon data path @@ -125,14 +144,13 @@ def loadJson [Lean.FromJson α] (path : System.FilePath) : IO α := do | .ok d => pure d | .error e => throw $ IO.userError s!"{repr e}" -def loadIxon [Ixon.Serialize α] (path : System.FilePath) : IO α := do +def loadIxon [Serialize α] (path : System.FilePath) : IO α := do let ixonBytes ← IO.FS.readBinFile path - match Ixon.de ixonBytes with + match de ixonBytes with | .ok d => pure d - | .error e => throw $ IO.userError s!"expected a, go {repr e}" + | .error e => throw $ IO.userError s!"expected a, got {repr e}" -def loadFile [Lean.FromJson α] [Ixon.Serialize α] (format : SerdeFormat) (path : System.FilePath) : IO α := do +def loadFile [Lean.FromJson α] [Serialize α] (format : SerdeFormat) (path : System.FilePath) : IO α := do match format with | .json => loadJson path | .ixon => loadIxon path - diff --git a/Ix/CanonM.lean b/Ix/CanonM.lean new file mode 100644 index 00000000..b7462b12 --- /dev/null +++ b/Ix/CanonM.lean @@ -0,0 +1,774 @@ +/- + # CanonM: Canonicalize Lean types to Ix types with content-addressed hashing + + Converts Lean kernel types (Name, Level, Expr, ConstantInfo) to their Ix + counterparts, embedding blake3 hashes at each node for O(1) structural equality. + + Uses pointer-based caching (`ptrAddrUnsafe`) to avoid recomputing hashes for + shared subterms — if two Lean values share the same pointer, they map to the + same canonical Ix value. + + Key operations: + - `canonName` / `uncanonName`: Lean.Name <-> Ix.Name + - `canonLevel` / `uncanonLevel`: Lean.Level <-> Ix.Level + - `canonExpr` / `uncanonExpr`: Lean.Expr <-> Ix.Expr + - `canonEnv` / `uncanonEnv`: whole-environment conversion + - `compareEnvsParallel`: parallel structural equality check between environments +-/ + +import Lean +import Blake3 +import Std.Data.HashMap +import Ix.Common +import Ix.Environment +import Ix.Address + +namespace Ix.CanonM + +open Std (HashMap) + +@[inline] def leanExprPtr (e : Lean.Expr) : USize := unsafe ptrAddrUnsafe e +@[inline] def leanNamePtr (n : Lean.Name) : USize := unsafe ptrAddrUnsafe n +@[inline] def leanLevelPtr (l : Lean.Level) : USize := unsafe ptrAddrUnsafe l +@[inline] def leanDataValuePtr (d : Lean.DataValue) : USize := unsafe ptrAddrUnsafe d + +structure CanonState where + namePtrAddrs: HashMap USize Address := {} + names: HashMap Address Ix.Name := {} + levelPtrAddrs: HashMap USize Address := {} + levels: HashMap Address Ix.Level := {} + exprPtrAddrs: HashMap USize Address := {} + exprs: HashMap Address Ix.Expr := {} + dataValuePtrAddrs: HashMap USize Address := {} + dataValues: HashMap Address Ix.DataValue := {} + +abbrev CanonM := StateT CanonState Id + +def internName (ptr: USize) (y : Ix.Name) : CanonM Unit := do + let h := y.getHash + modify fun s => { s with + names := s.names.insertIfNew h y + namePtrAddrs := s.namePtrAddrs.insertIfNew ptr h + } + +def internLevel (ptr: USize) (y : Ix.Level) : CanonM Unit := do + let h := y.getHash + modify fun s => { s with + levels := s.levels.insertIfNew h y + levelPtrAddrs := s.levelPtrAddrs.insertIfNew ptr h + } + +def internExpr (ptr: USize) (y : Ix.Expr) : CanonM Unit := do + let h := y.getHash + modify fun s => { s with + exprs := s.exprs.insertIfNew h y + exprPtrAddrs := s.exprPtrAddrs.insertIfNew ptr h + } + +def internDataValue (ptr: USize) (y : Ix.DataValue) : CanonM Unit := do + let mut h := Blake3.Hasher.init () + h := Ix.Expr.hashDataValue h y + let h' := ⟨(h.finalizeWithLength 32).val⟩ + modify fun s => { s with + dataValues := s.dataValues.insertIfNew h' y + dataValuePtrAddrs := s.dataValuePtrAddrs.insertIfNew ptr h' + } + +def canonName (n: Lean.Name) : CanonM Ix.Name := do + let ptr := leanNamePtr n + let s ← get + if let .some val := s.namePtrAddrs.get? ptr |>.bind s.names.get? then return val + else + let n' ← match n with + | .anonymous => pure .mkAnon + | .str pre str => .mkStr <$> canonName pre <*> pure str + | .num pre num => .mkNat <$> canonName pre <*> pure num + internName ptr n' + pure n' + +def canonLevel (l: Lean.Level) : CanonM Ix.Level := do + let ptr := leanLevelPtr l + let s ← get + if let .some val := s.levelPtrAddrs.get? ptr |>.bind s.levels.get? then return val + else + let l' ← match l with + | .zero => pure .mkZero + | .succ x => .mkSucc <$> canonLevel x + | .max x y => .mkMax <$> canonLevel x <*> canonLevel y + | .imax x y => .mkIMax <$> canonLevel x <*> canonLevel y + | .param n => .mkParam <$> canonName n + | .mvar n => .mkMvar <$> canonName n.name + internLevel ptr l' + pure l' + +def canonInt : _root_.Int → Ix.Int + | .ofNat n => .ofNat n + | .negSucc n => .negSucc n + +def canonSubstring (ss : _root_.Substring.Raw) : Ix.Substring := + { str := ss.str, startPos := ss.startPos.byteIdx, stopPos := ss.stopPos.byteIdx } + +def canonSourceInfo : Lean.SourceInfo → Ix.SourceInfo + | .original leading pos trailing endPos => + .original (canonSubstring leading) pos.byteIdx + (canonSubstring trailing) endPos.byteIdx + | .synthetic pos endPos canonical => .synthetic pos.byteIdx endPos.byteIdx canonical + | .none => .none + +def canonSyntaxPreresolved (sp : Lean.Syntax.Preresolved) : CanonM Ix.SyntaxPreresolved := + match sp with + | .namespace name => .namespace <$> canonName name + | .decl name aliases => .decl <$> canonName name <*> pure aliases.toArray + +partial def canonSyntax : Lean.Syntax → CanonM Ix.Syntax + | .missing => pure .missing + | .node info kind args => do + .node (canonSourceInfo info) <$> canonName kind <*> args.mapM canonSyntax + | .atom info val => pure <| .atom (canonSourceInfo info) val + | .ident info rawVal val preresolved => do + .ident (canonSourceInfo info) (canonSubstring rawVal) + <$> canonName val <*> preresolved.toArray.mapM canonSyntaxPreresolved + +def canonDataValue (d: Lean.DataValue): CanonM Ix.DataValue := do + let ptr := leanDataValuePtr d + let s ← get + if let .some val := s.dataValuePtrAddrs.get? ptr |>.bind s.dataValues.get? then return val + let d' ← match d with + | .ofString s => pure <| .ofString s + | .ofBool b => pure <| .ofBool b + | .ofName n => .ofName <$> canonName n + | .ofNat n => pure <| .ofNat n + | .ofInt i => pure <| .ofInt (canonInt i) + | .ofSyntax s => .ofSyntax <$> canonSyntax s + internDataValue ptr d' + pure d' + +def canonMData (md : Lean.MData) : CanonM (Array (Ix.Name × Ix.DataValue)) := do + let mut result := #[] + for (name, value) in md do + let name' <- canonName name + let value' <- canonDataValue value + result := result.push (name', value') + pure result + +def canonExpr (e: Lean.Expr) : CanonM Ix.Expr := do + let ptr := leanExprPtr e + let s ← get + if let .some val := s.exprPtrAddrs.get? ptr |>.bind s.exprs.get? then return val + else + let e' ← match e with + | .bvar idx => pure (.mkBVar idx) + | .fvar fvarId => .mkFVar <$> canonName fvarId.name + | .mvar mvarId => .mkMVar <$> canonName mvarId.name + | .sort level => .mkSort <$> canonLevel level + | .const name levels => + .mkConst <$> canonName name <*> levels.toArray.mapM canonLevel + | .app fn arg => .mkApp <$> canonExpr fn <*> canonExpr arg + | .lam name ty body bi => + .mkLam <$> canonName name + <*> canonExpr ty <*> canonExpr body <*> pure bi + | .forallE name ty body bi => + .mkForallE <$> canonName name + <*> canonExpr ty <*> canonExpr body <*> pure bi + | .letE name ty val body nonDep => + .mkLetE <$> canonName name + <*> canonExpr ty <*> canonExpr val + <*> canonExpr body <*> pure nonDep + | .lit l => pure (.mkLit l) + | .mdata md expr => .mkMData <$> canonMData md <*> canonExpr expr + | .proj typeName idx struct => + .mkProj <$> canonName typeName <*> pure idx <*> canonExpr struct + internExpr ptr e' + pure e' + +def canonConstantVal (cv : Lean.ConstantVal) : CanonM Ix.ConstantVal := do + pure { + name := (<- canonName cv.name) + levelParams := (<- cv.levelParams.toArray.mapM canonName) + type := (<- canonExpr cv.type) + } + +def canonRecursorRule (r : Lean.RecursorRule) : CanonM Ix.RecursorRule := do + pure { + ctor := (<- canonName r.ctor) + nfields := r.nfields + rhs := (<- canonExpr r.rhs) + } + +def canonConst : Lean.ConstantInfo → CanonM Ix.ConstantInfo + | .axiomInfo v => do + pure <| .axiomInfo { + cnst := (<- canonConstantVal v.toConstantVal) + isUnsafe := v.isUnsafe + } + | .defnInfo v => do + pure <| .defnInfo { + cnst := (<- canonConstantVal v.toConstantVal) + value := (<- canonExpr v.value) + hints := v.hints + safety := v.safety + all := (<- v.all.toArray.mapM canonName) + } + | .thmInfo v => do + pure <| .thmInfo { + cnst := (<- canonConstantVal v.toConstantVal) + value := (<- canonExpr v.value) + all := (<- v.all.toArray.mapM canonName) + } + | .opaqueInfo v => do + pure <| .opaqueInfo { + cnst := (<- canonConstantVal v.toConstantVal) + value := (<- canonExpr v.value) + isUnsafe := v.isUnsafe + all := (<- v.all.toArray.mapM canonName) + } + | .quotInfo v => do + pure <| .quotInfo { + cnst := (<- canonConstantVal v.toConstantVal) + kind := v.kind + } + | .inductInfo v => do + pure <| .inductInfo { + cnst := (<- canonConstantVal v.toConstantVal) + numParams := v.numParams + numIndices := v.numIndices + all := (<- v.all.toArray.mapM canonName) + ctors := (<- v.ctors.toArray.mapM canonName) + numNested := v.numNested + isRec := v.isRec + isUnsafe := v.isUnsafe + isReflexive := v.isReflexive + } + | .ctorInfo v => do + pure <| .ctorInfo { + cnst := (<- canonConstantVal v.toConstantVal) + induct := (<- canonName v.induct) + cidx := v.cidx + numParams := v.numParams + numFields := v.numFields + isUnsafe := v.isUnsafe + } + | .recInfo v => do + pure <| .recInfo { + cnst := (<- canonConstantVal v.toConstantVal) + all := (<- v.all.toArray.mapM canonName) + numParams := v.numParams + numIndices := v.numIndices + numMotives := v.numMotives + numMinors := v.numMinors + rules := (<- v.rules.toArray.mapM canonRecursorRule) + k := v.k + isUnsafe := v.isUnsafe + } + +structure UncanonState where + names: HashMap Address Lean.Name := {} + levels: HashMap Address Lean.Level := {} + exprs: HashMap Address Lean.Expr := {} + +abbrev UncanonM := StateT UncanonState Id + +def uncanonName (n: Ix.Name) : UncanonM Lean.Name := do + let addr := n.getHash + match (← get).names.get? addr with + | .some cached => pure cached + | .none => + let result ← match n with + | .anonymous _ => pure .anonymous + | .str pre str _ => .str <$> uncanonName pre <*> pure str + | .num pre num _ => .num <$> uncanonName pre <*> pure num + modify fun s => { s with names := s.names.insert addr result } + pure result + +def uncanonLevel (l: Ix.Level) : UncanonM Lean.Level := do + let addr := l.getHash + match (← get).levels.get? addr with + | .some cached => pure cached + | .none => + let result ← match l with + | .zero _ => pure .zero + | .succ x _ => .succ <$> uncanonLevel x + | .max x y _ => .max <$> uncanonLevel x <*> uncanonLevel y + | .imax x y _ => .imax <$> uncanonLevel x <*> uncanonLevel y + | .param n _ => .param <$> uncanonName n + | .mvar n _ => Lean.Level.mvar <$> .mk <$> uncanonName n + modify fun s => { s with levels := s.levels.insert addr result } + pure result + +def uncanonInt : Ix.Int → _root_.Int + | .ofNat n => .ofNat n + | .negSucc n => .negSucc n + +def uncanonSubstring (ss : Ix.Substring) : _root_.Substring.Raw := + { str := ss.str, startPos := ⟨ss.startPos⟩, stopPos := ⟨ss.stopPos⟩ } + +def uncanonSourceInfo : Ix.SourceInfo → Lean.SourceInfo + | .original leading leadingPos trailing trailingPos => + .original (uncanonSubstring leading) ⟨leadingPos⟩ + (uncanonSubstring trailing) ⟨trailingPos⟩ + | .synthetic start stop canonical => .synthetic ⟨start⟩ ⟨stop⟩ canonical + | .none => .none + +def uncanonSyntaxPreresolved (sp : Ix.SyntaxPreresolved) + : UncanonM Lean.Syntax.Preresolved := + match sp with + | .namespace name => .namespace <$> uncanonName name + | .decl name aliases => .decl <$> uncanonName name <*> pure aliases.toList + +partial def uncanonSyntax : Ix.Syntax → UncanonM Lean.Syntax + | .missing => pure .missing + | .node info kind args => do + .node (uncanonSourceInfo info) <$> uncanonName kind <*> args.mapM uncanonSyntax + | .atom info val => pure <| .atom (uncanonSourceInfo info) val + | .ident info rawVal val preresolved => do + .ident (uncanonSourceInfo info) (uncanonSubstring rawVal) + <$> uncanonName val <*> (preresolved.mapM uncanonSyntaxPreresolved >>= pure ∘ Array.toList) + +def uncanonDataValue : Ix.DataValue → UncanonM Lean.DataValue + | .ofString s => pure <| .ofString s + | .ofBool b => pure <| .ofBool b + | .ofName n => .ofName <$> uncanonName n + | .ofNat n => pure <| .ofNat n + | .ofInt i => pure <| .ofInt (uncanonInt i) + | .ofSyntax s => .ofSyntax <$> uncanonSyntax s + +def uncanonMData (data : Array (Ix.Name × Ix.DataValue)) : UncanonM Lean.MData := do + let mut result : Lean.MData := {} + for (name, value) in data do + let name' <- uncanonName name + let value' <- uncanonDataValue value + result := result.insert name' value' + pure result + +def uncanonExpr (e: Ix.Expr) : UncanonM Lean.Expr := do + let addr := e.getHash + match (← get).exprs.get? addr with + | .some cached => pure cached + | .none => + let result ← match e with + | .bvar idx _ => pure (.bvar idx) + | .fvar name _ => + Lean.Expr.fvar <$> (.mk <$> uncanonName name) + | .mvar name _ => + Lean.Expr.mvar <$> (.mk <$> uncanonName name) + | .sort level _ => .sort <$> uncanonLevel level + | .const name levels _ => + .const <$> uncanonName name <*> levels.toList.mapM uncanonLevel + | .app fn arg _ => .app <$> uncanonExpr fn <*> uncanonExpr arg + | .lam name ty body bi _ => + .lam <$> uncanonName name + <*> uncanonExpr ty <*> uncanonExpr body <*> pure bi + | .forallE name ty body bi _ => + .forallE <$> uncanonName name + <*> uncanonExpr ty <*> uncanonExpr body <*> pure bi + | .letE name ty val body nonDep _ => + .letE <$> uncanonName name + <*> uncanonExpr ty <*> uncanonExpr val + <*> uncanonExpr body <*> pure nonDep + | .lit l _ => pure (.lit l) + | .mdata data expr _ => .mdata <$> uncanonMData data <*> uncanonExpr expr + | .proj typeName idx struct _ => + .proj <$> uncanonName typeName <*> pure idx <*> uncanonExpr struct + modify fun s => { s with exprs := s.exprs.insert addr result } + pure result + +def uncanonConstantVal (cv : Ix.ConstantVal) : UncanonM Lean.ConstantVal := do + pure { + name := (<- uncanonName cv.name) + levelParams := (<- cv.levelParams.toList.mapM uncanonName) + type := (<- uncanonExpr cv.type) + } + +def uncanonRecursorRule (r : Ix.RecursorRule) : UncanonM Lean.RecursorRule := do + pure { + ctor := (<- uncanonName r.ctor) + nfields := r.nfields + rhs := (<- uncanonExpr r.rhs) + } + +def uncanonConst : Ix.ConstantInfo → UncanonM Lean.ConstantInfo + | .axiomInfo v => do + let cnst <- uncanonConstantVal v.cnst + pure <| .axiomInfo { + name := cnst.name + levelParams := cnst.levelParams + type := cnst.type + isUnsafe := v.isUnsafe + } + | .defnInfo v => do + let cnst <- uncanonConstantVal v.cnst + pure <| .defnInfo { + name := cnst.name + levelParams := cnst.levelParams + type := cnst.type + value := (<- uncanonExpr v.value) + hints := v.hints + safety := v.safety + all := (<- v.all.toList.mapM uncanonName) + } + | .thmInfo v => do + let cnst <- uncanonConstantVal v.cnst + pure <| .thmInfo { + name := cnst.name + levelParams := cnst.levelParams + type := cnst.type + value := (<- uncanonExpr v.value) + all := (<- v.all.toList.mapM uncanonName) + } + | .opaqueInfo v => do + let cnst <- uncanonConstantVal v.cnst + pure <| .opaqueInfo { + name := cnst.name + levelParams := cnst.levelParams + type := cnst.type + value := (<- uncanonExpr v.value) + isUnsafe := v.isUnsafe + all := (<- v.all.toList.mapM uncanonName) + } + | .quotInfo v => do + let cnst <- uncanonConstantVal v.cnst + pure <| .quotInfo { + name := cnst.name + levelParams := cnst.levelParams + type := cnst.type + kind := v.kind + } + | .inductInfo v => do + let cnst <- uncanonConstantVal v.cnst + pure <| .inductInfo { + name := cnst.name + levelParams := cnst.levelParams + type := cnst.type + numParams := v.numParams + numIndices := v.numIndices + all := (<- v.all.toList.mapM uncanonName) + ctors := (<- v.ctors.toList.mapM uncanonName) + numNested := v.numNested + isRec := v.isRec + isUnsafe := v.isUnsafe + isReflexive := v.isReflexive + } + | .ctorInfo v => do + let cnst <- uncanonConstantVal v.cnst + pure <| .ctorInfo { + name := cnst.name + levelParams := cnst.levelParams + type := cnst.type + induct := (<- uncanonName v.induct) + cidx := v.cidx + numParams := v.numParams + numFields := v.numFields + isUnsafe := v.isUnsafe + } + | .recInfo v => do + let cnst <- uncanonConstantVal v.cnst + pure <| .recInfo { + name := cnst.name + levelParams := cnst.levelParams + type := cnst.type + all := (<- v.all.toList.mapM uncanonName) + numParams := v.numParams + numIndices := v.numIndices + numMotives := v.numMotives + numMinors := v.numMinors + rules := (<- v.rules.toList.mapM uncanonRecursorRule) + k := v.k + isUnsafe := v.isUnsafe + } + +/-- Canonicalize an entire Lean environment, sharing a single CanonState for + deduplication across all constants. -/ +def canonEnv (env : Lean.Environment) : CanonM Ix.Environment := do + let mut consts : HashMap Ix.Name Ix.ConstantInfo := {} + for (name, const) in env.constants do + let name' <- canonName name + let const' <- canonConst const + consts := consts.insert name' const' + return { consts := consts } + +/-- Uncanonicalize an Ix environment back to a map of Lean constants. -/ +def uncanonEnv (env : Ix.Environment) : UncanonM (HashMap Lean.Name Lean.ConstantInfo) := do + let mut result : HashMap Lean.Name Lean.ConstantInfo := {} + for (name, const) in env.consts do + let name' ← uncanonName name + let const' ← uncanonConst const + result := result.insert name' const' + return result + +/-- Format milliseconds as human-readable time string. -/ +def formatTime (ms : Nat) : String := + if ms < 1000 then s!"{ms}ms" + else if ms < 60000 then + let s := ms / 1000 + let frac := (ms % 1000) / 100 + s!"{s}.{frac}s" + else + let m := ms / 60000 + let s := (ms % 60000) / 1000 + s!"{m}m{s}s" + +/- ## Optimized equality with pointer-pair caching -/ + +/-- A pair of pointers for caching equality results. -/ +structure PtrPair where + a : USize + b : USize + deriving Hashable, BEq + +/-- State for equality checking with pointer-pair cache. -/ +abbrev EqState := StateT (Std.HashSet PtrPair) Id + +/-- Check if two names are equal, using pointer cache. -/ +partial def nameEqCached (a b : Lean.Name) : EqState Bool := do + let ptrA := leanNamePtr a + let ptrB := leanNamePtr b + if ptrA == ptrB then return true + let pair := PtrPair.mk ptrA ptrB + let cache ← get + if cache.contains pair then return true + let eq ← match a, b with + | .anonymous, .anonymous => pure true + | .str preA strA, .str preB strB => + if strA != strB then pure false + else nameEqCached preA preB + | .num preA numA, .num preB numB => + if numA != numB then pure false + else nameEqCached preA preB + | _, _ => pure false + if eq then modify (·.insert pair) + pure eq + +/-- Check if two levels are equal, using pointer cache. -/ +partial def levelEqCached (a b : Lean.Level) : EqState Bool := do + let ptrA := leanLevelPtr a + let ptrB := leanLevelPtr b + if ptrA == ptrB then return true + let pair := PtrPair.mk ptrA ptrB + let cache ← get + if cache.contains pair then return true + let eq ← match a, b with + | .zero, .zero => pure true + | .succ a', .succ b' => levelEqCached a' b' + | .max a1 a2, .max b1 b2 => do + if !(← levelEqCached a1 b1) then return false + levelEqCached a2 b2 + | .imax a1 a2, .imax b1 b2 => do + if !(← levelEqCached a1 b1) then return false + levelEqCached a2 b2 + | .param nA, .param nB => pure (nA == nB) + | .mvar nA, .mvar nB => pure (nA == nB) + | _, _ => pure false + if eq then modify (·.insert pair) + pure eq + +/-- Check if two expressions are equal, using pointer cache. -/ +partial def exprEqCached (a b : Lean.Expr) : EqState Bool := do + let ptrA := leanExprPtr a + let ptrB := leanExprPtr b + if ptrA == ptrB then return true + let pair := PtrPair.mk ptrA ptrB + let cache ← get + if cache.contains pair then return true + let eq ← match a, b with + | .bvar idxA, .bvar idxB => pure (idxA == idxB) + | .fvar fA, .fvar fB => pure (fA == fB) + | .mvar mA, .mvar mB => pure (mA == mB) + | .sort lA, .sort lB => levelEqCached lA lB + | .const nA lsA, .const nB lsB => + if nA != nB then return false + if lsA.length != lsB.length then return false + for (la, lb) in lsA.zip lsB do + if !(← levelEqCached la lb) then return false + pure true + | .app fA argA, .app fB argB => do + if !(← exprEqCached fA fB) then return false + exprEqCached argA argB + | .lam nA tyA bodyA biA, .lam nB tyB bodyB biB => + if biA != biB then return false + if !(← nameEqCached nA nB) then return false + if !(← exprEqCached tyA tyB) then return false + exprEqCached bodyA bodyB + | .forallE nA tyA bodyA biA, .forallE nB tyB bodyB biB => + if biA != biB then return false + if !(← nameEqCached nA nB) then return false + if !(← exprEqCached tyA tyB) then return false + exprEqCached bodyA bodyB + | .letE nA tyA valA bodyA ndA, .letE nB tyB valB bodyB ndB => + if ndA != ndB then return false + if !(← nameEqCached nA nB) then return false + if !(← exprEqCached tyA tyB) then return false + if !(← exprEqCached valA valB) then return false + exprEqCached bodyA bodyB + | .lit lA, .lit lB => pure (lA == lB) + -- Mdata entries are ignored for semantic equality (they carry annotations, not semantics) + | .mdata _ eA, .mdata _ eB => exprEqCached eA eB + | .proj tnA idxA sA, .proj tnB idxB sB => + if tnA != tnB || idxA != idxB then return false + exprEqCached sA sB + | _, _ => pure false + if eq then modify (·.insert pair) + pure eq + +/-- Check if two ConstantInfo are equal, using fresh pointer cache. + Checks cheap scalar fields first, then names, then types/values. -/ +def constInfoEqCached (a b : Lean.ConstantInfo) : Bool := + let check : EqState Bool := do + -- Check levelParams length first (cheap) + if a.levelParams.length != b.levelParams.length then return false + -- Check variant-specific cheap fields before expensive structural checks + match a, b with + | .axiomInfo v1, .axiomInfo v2 => + if v1.isUnsafe != v2.isUnsafe then return false + | .defnInfo v1, .defnInfo v2 => + if v1.safety != v2.safety then return false + | .thmInfo _, .thmInfo _ => pure () + | .opaqueInfo v1, .opaqueInfo v2 => + if v1.isUnsafe != v2.isUnsafe then return false + | .quotInfo v1, .quotInfo v2 => + if v1.kind != v2.kind then return false + | .inductInfo v1, .inductInfo v2 => + if v1.numParams != v2.numParams then return false + if v1.numIndices != v2.numIndices then return false + if v1.numNested != v2.numNested then return false + if v1.isRec != v2.isRec then return false + if v1.isUnsafe != v2.isUnsafe then return false + if v1.isReflexive != v2.isReflexive then return false + if v1.isNested != v2.isNested then return false + if v1.all != v2.all then return false + if v1.ctors != v2.ctors then return false + | .ctorInfo v1, .ctorInfo v2 => + if v1.cidx != v2.cidx then return false + if v1.numParams != v2.numParams then return false + if v1.numFields != v2.numFields then return false + if v1.isUnsafe != v2.isUnsafe then return false + if v1.induct != v2.induct then return false + | .recInfo v1, .recInfo v2 => + if v1.numParams != v2.numParams then return false + if v1.numIndices != v2.numIndices then return false + if v1.numMotives != v2.numMotives then return false + if v1.numMinors != v2.numMinors then return false + if v1.k != v2.k then return false + if v1.isUnsafe != v2.isUnsafe then return false + if v1.rules.length != v2.rules.length then return false + if v1.all != v2.all then return false + | _, _ => return false + -- Now check names (cheaper than exprs) + if !(← nameEqCached a.name b.name) then return false + for (la, lb) in a.levelParams.zip b.levelParams do + if !(← nameEqCached la lb) then return false + -- Finally check types and values (most expensive) + if !(← exprEqCached a.type b.type) then return false + match a, b with + | .axiomInfo _, .axiomInfo _ => pure true + | .defnInfo v1, .defnInfo v2 => exprEqCached v1.value v2.value + | .thmInfo v1, .thmInfo v2 => exprEqCached v1.value v2.value + | .opaqueInfo v1, .opaqueInfo v2 => exprEqCached v1.value v2.value + | .quotInfo _, .quotInfo _ => pure true + | .inductInfo _, .inductInfo _ => pure true + | .ctorInfo _, .ctorInfo _ => pure true + | .recInfo v1, .recInfo v2 => do + for (r1, r2) in v1.rules.zip v2.rules do + if r1.ctor != r2.ctor then return false + if r1.nfields != r2.nfields then return false + if !(← exprEqCached r1.rhs r2.rhs) then return false + pure true + | _, _ => pure false + (check.run {}).1 + +/-- Compare two environments in parallel using Tasks. + One-directional: only checks that env1 entries exist and match in env2. + Extra entries in env2 are not detected (by design for roundtrip verification). + Returns (numMismatches, numMissing, mismatchNames, missingNames). -/ +def compareEnvsParallel (env1 env2 : Std.HashMap Lean.Name Lean.ConstantInfo) + : Nat × Nat × Array Lean.Name × Array Lean.Name := Id.run do + let entries := env1.toArray + let tasks := entries.map fun (name, const1) => + Task.spawn fun _ => + match env2.get? name with + | none => (false, true, name) + | some const2 => + let eq := constInfoEqCached const1 const2 + (eq, false, name) + let mut mismatchNames : Array Lean.Name := #[] + let mut missingNames : Array Lean.Name := #[] + for task in tasks do + let (eq, isMissing, name) := task.get + if isMissing then + missingNames := missingNames.push name + else if !eq then + mismatchNames := mismatchNames.push name + (mismatchNames.size, missingNames.size, mismatchNames, missingNames) + +/- ## Parallel Canonicalization -/ + +/-- Split an array into chunks of the given size. -/ +def chunks (arr : Array α) (chunkSize : Nat) : Array (Array α) := Id.run do + if chunkSize == 0 then return #[arr] + let mut result : Array (Array α) := #[] + let mut i := 0 + while i < arr.size do + let endIdx := min (i + chunkSize) arr.size + result := result.push (arr.extract i endIdx) + i := endIdx + result + +/-- Process a chunk of constants with local state (pure). + Returns just the canonicalized constants. -/ +def canonChunk (chunk : Array (Lean.Name × Lean.ConstantInfo)) + : Array (Ix.Name × Ix.ConstantInfo) := Id.run do + let mut state : CanonState := {} + let mut results : Array (Ix.Name × Ix.ConstantInfo) := #[] + for (name, const) in chunk do + let (name', state') := StateT.run (canonName name) state + let (const', state'') := StateT.run (canonConst const) state' + state := state'' + results := results.push (name', const') + results + +/-- Canonicalize an entire Lean environment in parallel (pure). + Returns just the constants map. -/ +def canonEnvParallel (env : Lean.Environment) (numWorkers : Nat := 8) + : HashMap Ix.Name Ix.ConstantInfo := + let constArr := env.constants.toList.toArray + let chunkSize := (constArr.size + numWorkers - 1) / numWorkers + let chunkArr := chunks constArr chunkSize + let tasks := chunkArr.map fun chunk => + Task.spawn fun _ => canonChunk chunk + Id.run do + let mut consts : HashMap Ix.Name Ix.ConstantInfo := {} + for task in tasks do + for (name, const) in task.get do + consts := consts.insert name const + consts + +/-- Process a chunk of Ix constants back to Lean (pure). + Returns just the uncanonicalized constants. -/ +def uncanonChunk (chunk : Array (Ix.Name × Ix.ConstantInfo)) + : Array (Lean.Name × Lean.ConstantInfo) := Id.run do + let mut state : UncanonState := {} + let mut results : Array (Lean.Name × Lean.ConstantInfo) := #[] + for (name, const) in chunk do + let (name', state') := StateT.run (uncanonName name) state + let (const', state'') := StateT.run (uncanonConst const) state' + state := state'' + results := results.push (name', const') + results + +/-- Uncanonicalize an Ix environment in parallel (pure). + Returns a HashMap of Lean constants. -/ +def uncanonEnvParallel (consts : HashMap Ix.Name Ix.ConstantInfo) (numWorkers : Nat := 8) + : HashMap Lean.Name Lean.ConstantInfo := + let constArr := consts.toArray + let chunkSize := (constArr.size + numWorkers - 1) / numWorkers + let chunkArr := chunks constArr chunkSize + let tasks := chunkArr.map fun chunk => + Task.spawn fun _ => uncanonChunk chunk + Id.run do + let mut result : HashMap Lean.Name Lean.ConstantInfo := {} + for task in tasks do + for (name, const) in task.get do + result := result.insert name const + result + +end Ix.CanonM + diff --git a/Ix/Claim.lean b/Ix/Claim.lean new file mode 100644 index 00000000..3f7eb8c9 --- /dev/null +++ b/Ix/Claim.lean @@ -0,0 +1,472 @@ +/- + # Claim: ZK claim types and serialization + + Defines the claim types used in Ix's zero-knowledge proof system: + - `EvalClaim`: asserts that a constant evaluates to a given output + - `CheckClaim`: asserts that a constant is well-typed + - `RevealClaim`: selectively reveals fields of a committed constant + + `RevealConstantInfo` and `RevealMutConstInfo` use bitmask-based serialization + to encode which fields are present, enabling selective revelation without + exposing the full constant. +-/ + +import Ix.Ixon + +namespace Ix + +open Ixon + +-- ============================================================================ +-- Helpers +-- ============================================================================ + +private def computeMask (flags : List Bool) : UInt64 := Id.run do + let mut mask : UInt64 := 0 + let mut i : UInt64 := 0 + for f in flags do + if f then mask := mask ||| (1 <<< i) + i := i + 1 + return mask + +private def putBoolField (b : Bool) : PutM Unit := putU8 (if b then 1 else 0) + +private def getBoolField : GetM Bool := do + let v ← getU8 + if v == 0 then return false + else if v == 1 then return true + else throw s!"getBoolField: invalid {v}" + +private def putDefKind (k : DefKind) : PutM Unit := + putU8 (match k with | .defn => 0 | .opaq => 1 | .thm => 2) + +private def getDefKind : GetM DefKind := do + match (← getU8) with + | 0 => return .defn | 1 => return .opaq | 2 => return .thm + | v => throw s!"getDefKind: invalid {v}" + +private def putDefSafety (s : DefinitionSafety) : PutM Unit := + putU8 (match s with | .unsaf => 0 | .safe => 1 | .part => 2) + +private def getDefSafety : GetM DefinitionSafety := do + match (← getU8) with + | 0 => return .unsaf | 1 => return .safe | 2 => return .part + | v => throw s!"getDefSafety: invalid {v}" + +private def putQuotKind (k : QuotKind) : PutM Unit := + putU8 (match k with | .type => 0 | .ctor => 1 | .lift => 2 | .ind => 3) + +private def getQuotKind : GetM QuotKind := do + match (← getU8) with + | 0 => return .type | 1 => return .ctor | 2 => return .lift | 3 => return .ind + | v => throw s!"getQuotKind: invalid {v}" + +private def getTag0Size : GetM UInt64 := return (← getTag0).size + +private def getOpt (mask : UInt64) (bit : UInt64) (read : GetM α) : GetM (Option α) := + if mask &&& bit != 0 then some <$> read else pure none + +-- ============================================================================ +-- Types +-- ============================================================================ + +/-- Revealed fields of a Constructor within an Inductive. -/ +structure RevealConstructorInfo where + isUnsafe : Option Bool := none + lvls : Option UInt64 := none + cidx : Option UInt64 := none + params : Option UInt64 := none + fields : Option UInt64 := none + typ : Option Address := none + deriving BEq, Repr, Inhabited + +/-- Revealed fields of a RecursorRule. -/ +structure RevealRecursorRule where + ruleIdx : UInt64 + fields : UInt64 + rhs : Address + deriving BEq, Repr, Inhabited + +/-- Revealed fields of a MutConst component. -/ +inductive RevealMutConstInfo where + | defn (kind : Option DefKind) (safety : Option DefinitionSafety) + (lvls : Option UInt64) (typ : Option Address) (value : Option Address) + | indc (isRecr : Option Bool) (refl : Option Bool) (isUnsafe : Option Bool) + (lvls : Option UInt64) (params : Option UInt64) + (indices : Option UInt64) (nested : Option UInt64) + (typ : Option Address) (ctors : Option (Array (UInt64 × RevealConstructorInfo))) + | recr (k : Option Bool) (isUnsafe : Option Bool) (lvls : Option UInt64) + (params : Option UInt64) (indices : Option UInt64) + (motives : Option UInt64) (minors : Option UInt64) + (typ : Option Address) (rules : Option (Array RevealRecursorRule)) + deriving BEq, Repr, Inhabited + +/-- Revealed fields of a ConstantInfo behind a commitment. -/ +inductive RevealConstantInfo where + | defn (kind : Option DefKind) (safety : Option DefinitionSafety) + (lvls : Option UInt64) (typ : Option Address) (value : Option Address) + | recr (k : Option Bool) (isUnsafe : Option Bool) (lvls : Option UInt64) + (params : Option UInt64) (indices : Option UInt64) + (motives : Option UInt64) (minors : Option UInt64) + (typ : Option Address) (rules : Option (Array RevealRecursorRule)) + | axio (isUnsafe : Option Bool) (lvls : Option UInt64) (typ : Option Address) + | quot (kind : Option QuotKind) (lvls : Option UInt64) (typ : Option Address) + | cPrj (idx : Option UInt64) (cidx : Option UInt64) (block : Option Address) + | rPrj (idx : Option UInt64) (block : Option Address) + | iPrj (idx : Option UInt64) (block : Option Address) + | dPrj (idx : Option UInt64) (block : Option Address) + | muts (components : Array (UInt64 × RevealMutConstInfo)) + deriving BEq, Repr, Inhabited + +/-- A claim that can be proven. -/ +inductive Claim where + | eval (input : Address) (output : Address) + | check (value : Address) + | reveal (comm : Address) (info : RevealConstantInfo) + deriving BEq, Repr, Inhabited + +-- ============================================================================ +-- RevealConstructorInfo serialization +-- ============================================================================ + +namespace RevealConstructorInfo + +def put (info : RevealConstructorInfo) : PutM Unit := do + let mask := computeMask [info.isUnsafe.isSome, info.lvls.isSome, info.cidx.isSome, + info.params.isSome, info.fields.isSome, info.typ.isSome] + putTag0 ⟨mask⟩ + match info.isUnsafe with | some b => putBoolField b | none => pure () + match info.lvls with | some n => putTag0 ⟨n⟩ | none => pure () + match info.cidx with | some n => putTag0 ⟨n⟩ | none => pure () + match info.params with | some n => putTag0 ⟨n⟩ | none => pure () + match info.fields with | some n => putTag0 ⟨n⟩ | none => pure () + match info.typ with | some a => Serialize.put a | none => pure () + +def get : GetM RevealConstructorInfo := do + let mask := (← getTag0).size + let isUnsafe ← getOpt mask 1 getBoolField + let lvls ← getOpt mask 2 getTag0Size + let cidx ← getOpt mask 4 getTag0Size + let params ← getOpt mask 8 getTag0Size + let fields ← getOpt mask 16 getTag0Size + let typ ← getOpt mask 32 Serialize.get + return ⟨isUnsafe, lvls, cidx, params, fields, typ⟩ + +end RevealConstructorInfo + +-- ============================================================================ +-- RevealRecursorRule serialization +-- ============================================================================ + +namespace RevealRecursorRule + +def put (rule : RevealRecursorRule) : PutM Unit := do + putTag0 ⟨rule.ruleIdx⟩ + putTag0 ⟨rule.fields⟩ + Serialize.put rule.rhs + +def get : GetM RevealRecursorRule := do + let ruleIdx ← getTag0Size + let fields ← getTag0Size + let rhs ← Serialize.get + return ⟨ruleIdx, fields, rhs⟩ + +end RevealRecursorRule + +-- ============================================================================ +-- Array helpers +-- ============================================================================ + +private def putRules (rules : Array RevealRecursorRule) : PutM Unit := do + putTag0 ⟨rules.size.toUInt64⟩ + for rule in rules do RevealRecursorRule.put rule + +private def getRules : GetM (Array RevealRecursorRule) := do + let count ← getTag0Size + let mut rules := #[] + for _ in [:count.toNat] do rules := rules.push (← RevealRecursorRule.get) + return rules + +private def putCtors (ctors : Array (UInt64 × RevealConstructorInfo)) : PutM Unit := do + putTag0 ⟨ctors.size.toUInt64⟩ + for (idx, info) in ctors do + putTag0 ⟨idx⟩ + RevealConstructorInfo.put info + +private def getCtors : GetM (Array (UInt64 × RevealConstructorInfo)) := do + let count ← getTag0Size + let mut ctors := #[] + for _ in [:count.toNat] do + let idx ← getTag0Size + let info ← RevealConstructorInfo.get + ctors := ctors.push (idx, info) + return ctors + +-- ============================================================================ +-- RevealMutConstInfo serialization +-- ============================================================================ + +namespace RevealMutConstInfo + +def put : RevealMutConstInfo → PutM Unit + | .defn kind safety lvls typ value => do + putU8 0 + let mask := computeMask [kind.isSome, safety.isSome, lvls.isSome, typ.isSome, value.isSome] + putTag0 ⟨mask⟩ + match kind with | some k => putDefKind k | none => pure () + match safety with | some s => putDefSafety s | none => pure () + match lvls with | some n => putTag0 ⟨n⟩ | none => pure () + match typ with | some a => Serialize.put a | none => pure () + match value with | some a => Serialize.put a | none => pure () + | .indc isRecr refl isUnsafe lvls params indices nested typ ctors => do + putU8 1 + let mask := computeMask [isRecr.isSome, refl.isSome, isUnsafe.isSome, + lvls.isSome, params.isSome, indices.isSome, + nested.isSome, typ.isSome, ctors.isSome] + putTag0 ⟨mask⟩ + match isRecr with | some b => putBoolField b | none => pure () + match refl with | some b => putBoolField b | none => pure () + match isUnsafe with | some b => putBoolField b | none => pure () + match lvls with | some n => putTag0 ⟨n⟩ | none => pure () + match params with | some n => putTag0 ⟨n⟩ | none => pure () + match indices with | some n => putTag0 ⟨n⟩ | none => pure () + match nested with | some n => putTag0 ⟨n⟩ | none => pure () + match typ with | some a => Serialize.put a | none => pure () + match ctors with | some c => putCtors c | none => pure () + | .recr k isUnsafe lvls params indices motives minors typ rules => do + putU8 2 + let mask := computeMask [k.isSome, isUnsafe.isSome, lvls.isSome, + params.isSome, indices.isSome, motives.isSome, + minors.isSome, typ.isSome, rules.isSome] + putTag0 ⟨mask⟩ + match k with | some b => putBoolField b | none => pure () + match isUnsafe with | some b => putBoolField b | none => pure () + match lvls with | some n => putTag0 ⟨n⟩ | none => pure () + match params with | some n => putTag0 ⟨n⟩ | none => pure () + match indices with | some n => putTag0 ⟨n⟩ | none => pure () + match motives with | some n => putTag0 ⟨n⟩ | none => pure () + match minors with | some n => putTag0 ⟨n⟩ | none => pure () + match typ with | some a => Serialize.put a | none => pure () + match rules with | some r => putRules r | none => pure () + +def get : GetM RevealMutConstInfo := do + let variant ← getU8 + let mask ← getTag0Size + match variant with + | 0 => do -- Defn + let kind ← getOpt mask 1 getDefKind + let safety ← getOpt mask 2 getDefSafety + let lvls ← getOpt mask 4 getTag0Size + let typ ← getOpt mask 8 Serialize.get + let value ← getOpt mask 16 Serialize.get + return .defn kind safety lvls typ value + | 1 => do -- Indc + let isRecr ← getOpt mask 1 getBoolField + let refl ← getOpt mask 2 getBoolField + let isUnsafe ← getOpt mask 4 getBoolField + let lvls ← getOpt mask 8 getTag0Size + let params ← getOpt mask 16 getTag0Size + let indices ← getOpt mask 32 getTag0Size + let nested ← getOpt mask 64 getTag0Size + let typ ← getOpt mask 128 Serialize.get + let ctors ← getOpt mask 256 getCtors + return .indc isRecr refl isUnsafe lvls params indices nested typ ctors + | 2 => do -- Recr + let k ← getOpt mask 1 getBoolField + let isUnsafe ← getOpt mask 2 getBoolField + let lvls ← getOpt mask 4 getTag0Size + let params ← getOpt mask 8 getTag0Size + let indices ← getOpt mask 16 getTag0Size + let motives ← getOpt mask 32 getTag0Size + let minors ← getOpt mask 64 getTag0Size + let typ ← getOpt mask 128 Serialize.get + let rules ← getOpt mask 256 getRules + return .recr k isUnsafe lvls params indices motives minors typ rules + | v => throw s!"RevealMutConstInfo.get: invalid variant {v}" + +end RevealMutConstInfo + +-- ============================================================================ +-- RevealConstantInfo serialization +-- ============================================================================ + +namespace RevealConstantInfo + +def put : RevealConstantInfo → PutM Unit + | .defn kind safety lvls typ value => do + putU8 0 + let mask := computeMask [kind.isSome, safety.isSome, lvls.isSome, typ.isSome, value.isSome] + putTag0 ⟨mask⟩ + match kind with | some k => putDefKind k | none => pure () + match safety with | some s => putDefSafety s | none => pure () + match lvls with | some n => putTag0 ⟨n⟩ | none => pure () + match typ with | some a => Serialize.put a | none => pure () + match value with | some a => Serialize.put a | none => pure () + | .recr k isUnsafe lvls params indices motives minors typ rules => do + putU8 1 + let mask := computeMask [k.isSome, isUnsafe.isSome, lvls.isSome, + params.isSome, indices.isSome, motives.isSome, + minors.isSome, typ.isSome, rules.isSome] + putTag0 ⟨mask⟩ + match k with | some b => putBoolField b | none => pure () + match isUnsafe with | some b => putBoolField b | none => pure () + match lvls with | some n => putTag0 ⟨n⟩ | none => pure () + match params with | some n => putTag0 ⟨n⟩ | none => pure () + match indices with | some n => putTag0 ⟨n⟩ | none => pure () + match motives with | some n => putTag0 ⟨n⟩ | none => pure () + match minors with | some n => putTag0 ⟨n⟩ | none => pure () + match typ with | some a => Serialize.put a | none => pure () + match rules with | some r => putRules r | none => pure () + | .axio isUnsafe lvls typ => do + putU8 2 + let mask := computeMask [isUnsafe.isSome, lvls.isSome, typ.isSome] + putTag0 ⟨mask⟩ + match isUnsafe with | some b => putBoolField b | none => pure () + match lvls with | some n => putTag0 ⟨n⟩ | none => pure () + match typ with | some a => Serialize.put a | none => pure () + | .quot kind lvls typ => do + putU8 3 + let mask := computeMask [kind.isSome, lvls.isSome, typ.isSome] + putTag0 ⟨mask⟩ + match kind with | some k => putQuotKind k | none => pure () + match lvls with | some n => putTag0 ⟨n⟩ | none => pure () + match typ with | some a => Serialize.put a | none => pure () + | .cPrj idx cidx block => do + putU8 4 + let mask := computeMask [idx.isSome, cidx.isSome, block.isSome] + putTag0 ⟨mask⟩ + match idx with | some n => putTag0 ⟨n⟩ | none => pure () + match cidx with | some n => putTag0 ⟨n⟩ | none => pure () + match block with | some a => Serialize.put a | none => pure () + | .rPrj idx block => do + putU8 5 + let mask := computeMask [idx.isSome, block.isSome] + putTag0 ⟨mask⟩ + match idx with | some n => putTag0 ⟨n⟩ | none => pure () + match block with | some a => Serialize.put a | none => pure () + | .iPrj idx block => do + putU8 6 + let mask := computeMask [idx.isSome, block.isSome] + putTag0 ⟨mask⟩ + match idx with | some n => putTag0 ⟨n⟩ | none => pure () + match block with | some a => Serialize.put a | none => pure () + | .dPrj idx block => do + putU8 7 + let mask := computeMask [idx.isSome, block.isSome] + putTag0 ⟨mask⟩ + match idx with | some n => putTag0 ⟨n⟩ | none => pure () + match block with | some a => Serialize.put a | none => pure () + | .muts components => do + putU8 8 + let mask : UInt64 := if components.isEmpty then 0 else 1 + putTag0 ⟨mask⟩ + if !components.isEmpty then + putTag0 ⟨components.size.toUInt64⟩ + for (idx, info) in components do + putTag0 ⟨idx⟩ + RevealMutConstInfo.put info + +def get : GetM RevealConstantInfo := do + let variant ← getU8 + let mask ← getTag0Size + match variant with + | 0 => do -- Defn + let kind ← getOpt mask 1 getDefKind + let safety ← getOpt mask 2 getDefSafety + let lvls ← getOpt mask 4 getTag0Size + let typ ← getOpt mask 8 Serialize.get + let value ← getOpt mask 16 Serialize.get + return .defn kind safety lvls typ value + | 1 => do -- Recr + let k ← getOpt mask 1 getBoolField + let isUnsafe ← getOpt mask 2 getBoolField + let lvls ← getOpt mask 4 getTag0Size + let params ← getOpt mask 8 getTag0Size + let indices ← getOpt mask 16 getTag0Size + let motives ← getOpt mask 32 getTag0Size + let minors ← getOpt mask 64 getTag0Size + let typ ← getOpt mask 128 Serialize.get + let rules ← getOpt mask 256 getRules + return .recr k isUnsafe lvls params indices motives minors typ rules + | 2 => do -- Axio + let isUnsafe ← getOpt mask 1 getBoolField + let lvls ← getOpt mask 2 getTag0Size + let typ ← getOpt mask 4 Serialize.get + return .axio isUnsafe lvls typ + | 3 => do -- Quot + let kind ← getOpt mask 1 getQuotKind + let lvls ← getOpt mask 2 getTag0Size + let typ ← getOpt mask 4 Serialize.get + return .quot kind lvls typ + | 4 => do -- CPrj + let idx ← getOpt mask 1 getTag0Size + let cidx ← getOpt mask 2 getTag0Size + let block ← getOpt mask 4 Serialize.get + return .cPrj idx cidx block + | 5 => do -- RPrj + let idx ← getOpt mask 1 getTag0Size + let block ← getOpt mask 2 Serialize.get + return .rPrj idx block + | 6 => do -- IPrj + let idx ← getOpt mask 1 getTag0Size + let block ← getOpt mask 2 Serialize.get + return .iPrj idx block + | 7 => do -- DPrj + let idx ← getOpt mask 1 getTag0Size + let block ← getOpt mask 2 Serialize.get + return .dPrj idx block + | 8 => do -- Muts + let components ← if mask &&& 1 != 0 then do + let count ← getTag0Size + let mut comps : Array (UInt64 × RevealMutConstInfo) := #[] + for _ in [:count.toNat] do + let idx ← getTag0Size + let info ← RevealMutConstInfo.get + comps := comps.push (idx, info) + pure comps + else pure #[] + return .muts components + | v => throw s!"RevealConstantInfo.get: invalid variant {v}" + +end RevealConstantInfo + +-- ============================================================================ +-- Claim serialization +-- ============================================================================ + +namespace Claim + +def put : Claim → PutM Unit + | .eval input output => do + putTag4 ⟨0xE, 4⟩ + Serialize.put input + Serialize.put output + | .check value => do + putTag4 ⟨0xE, 3⟩ + Serialize.put value + | .reveal comm info => do + putTag4 ⟨0xE, 6⟩ + Serialize.put comm + RevealConstantInfo.put info + +def get : GetM Claim := do + let tag ← getTag4 + if tag.flag != 0xE then throw s!"Claim.get: expected flag 0xE, got {tag.flag}" + match tag.size with + | 4 => return .eval (← Serialize.get) (← Serialize.get) + | 3 => return .check (← Serialize.get) + | 6 => return .reveal (← Serialize.get) (← RevealConstantInfo.get) + | n => throw s!"Claim.get: invalid variant {n}" + +def ser (c : Claim) : ByteArray := runPut (put c) +def commit (c : Claim) : Address := Address.blake3 (ser c) + +instance : ToString Claim where + toString c := match c with + | .eval i o => s!"EvalClaim({i}, {o})" + | .check v => s!"CheckClaim({v})" + | .reveal comm info => s!"RevealClaim({comm}, {repr info})" + +end Claim + +end Ix diff --git a/Ix/Commit.lean b/Ix/Commit.lean new file mode 100644 index 00000000..c437e4dd --- /dev/null +++ b/Ix/Commit.lean @@ -0,0 +1,263 @@ +/- + Ix.Commit: Commitment pipeline for ZK voting and claim construction. + + Provides utilities for: + - Building a CompileEnv from rsCompilePhases output + - Incrementally compiling and committing definitions + - Building evaluation, check, and reveal claims + - Opening committed constants for selective field revelation +-/ + +import Ix.Claim +import Ix.CompileM +import Ix.CanonM + +namespace Ix.Commit + +open Ixon (PutM runPut putConstant putExpr Comm) + +-- ============================================================================ +-- mkCompileEnv: Build a compilation cache from rsCompilePhases output +-- ============================================================================ + +/-- Create a CompileEnv from the output of rsCompilePhases. + This allows incremental compilation of new definitions against + the already-compiled base environment. -/ +def mkCompileEnv (phases : Ix.CompileM.CompilePhases) : Ix.CompileM.CompileEnv := + { env := phases.rawEnv + , nameToNamed := phases.compileEnv.named + , constants := phases.compileEnv.consts + , blobs := phases.compileEnv.blobs + , totalBytes := 0 } + +-- ============================================================================ +-- Secret generation and commitment creation +-- ============================================================================ + +/-- Generate a random 32-byte secret for blinding a commitment. -/ +def generateSecret : IO Address := do + return ⟨← IO.getRandomBytes 32⟩ + +/-- Create a commitment from a payload address. + Returns the Comm structure and the commitment address. -/ +def commitConst (payload : Address) : IO (Comm × Address) := do + let secret ← generateSecret + let comm : Comm := ⟨secret, payload⟩ + return (comm, Comm.commit comm) + +-- ============================================================================ +-- Single-definition compilation +-- ============================================================================ + +/-- Canonicalize and compile a single definition, returning its content address. + Uses CanonM to canonicalize the Lean expressions, then CompileM to compile. + By default the definition is compiled under `Lean.Name.anonymous`; pass a + different `name` to test alpha-invariance. -/ +def compileDef (compileEnv : CompileM.CompileEnv) + (lvls : List Lean.Name) (type value : Lean.Expr) + (name : Lean.Name := .anonymous) + : Except String (Address × CompileM.CompileEnv) := do + -- 1. Create Lean ConstantInfo + let defnVal : Lean.DefinitionVal := { + name := name + levelParams := lvls + type := type + value := value + hints := .regular 0 + safety := .safe + } + let leanConst := Lean.ConstantInfo.defnInfo defnVal + + -- 2. Canonicalize via CanonM (pure, runs in Id) + let (ixName, canonState) := (CanonM.canonName name).run {} + let (ixConst, _) := (CanonM.canonConst leanConst).run canonState + + -- 3. Add to canonical environment + let env' := { compileEnv.env with consts := compileEnv.env.consts.insert ixName ixConst } + let compileEnv' := { compileEnv with env := env' } + + -- 4. Compile via CompileM (pure, runs in Except) + let all : Ix.Set Ix.Name := ({} : Ix.Set Ix.Name).insert ixName + match CompileM.compileBlockPure compileEnv' all ixName with + | .error e => .error s!"compileDef: {e}" + | .ok (result, blockState) => + -- 5. Serialize and hash to get content address + let blockBytes := runPut (putConstant result.block) + let addr := Address.blake3 blockBytes + + -- 6. Update CompileEnv with new constant + let compileEnv'' := { compileEnv' with + constants := compileEnv'.constants.insert addr result.block + nameToNamed := compileEnv'.nameToNamed.insert ixName ⟨addr, result.blockMeta⟩ + blobs := blockState.blockBlobs.fold (fun m k v => m.insert k v) compileEnv'.blobs + totalBytes := compileEnv'.totalBytes + blockBytes.size + } + .ok (addr, compileEnv'') + +-- ============================================================================ +-- Commit a definition: compile + create commitment + register +-- ============================================================================ + +/-- Full commitment pipeline: canonicalize, compile, commit, and register. + Returns the commitment address, updated Lean environment, and updated CompileEnv. + The committed constant is added to the Lean environment under + `Address.toUniqueName commitAddr` so it can be referenced in later expressions. -/ +def commitDef (compileEnv : CompileM.CompileEnv) (leanEnv : Lean.Environment) + (lvls : List Lean.Name) (type value : Lean.Expr) + : IO (Address × Lean.Environment × CompileM.CompileEnv) := do + -- 1. Compile under anonymous to get payload address + let (payloadAddr, compileEnv') ← IO.ofExcept (compileDef compileEnv lvls type value) + + -- 2. Create commitment + let (_comm, commitAddr) ← commitConst payloadAddr + + -- 3. Alpha-invariance check: recompile under the commit name and verify + -- the address is identical. If the compiler leaks names into the + -- serialized block, this will catch it immediately rather than letting + -- a broken commitment silently propagate. + let commitName := Address.toUniqueName commitAddr + let namedAddr ← IO.ofExcept do + let (addr, _) ← compileDef compileEnv lvls type value (name := commitName) + return addr + if payloadAddr != namedAddr then + throw $ IO.userError s!"commitDef: alpha-invariance failure: anonymous \ + compiled to {payloadAddr} but {commitName} compiled to {namedAddr}" + + -- 4. Add to Lean environment under the commitment address name + let defnVal : Lean.DefinitionVal := { + name := commitName + levelParams := lvls + type := type + value := value + hints := .regular 0 + safety := .safe + } + let decl := Lean.Declaration.defnDecl defnVal + let leanEnv' ← match Lean.Environment.addDeclCore leanEnv 0 decl .none with + | .ok env => pure env + | .error _e => throw $ IO.userError "commitDef: addDeclCore failed" + + -- 4. Also register the committed name in CompileEnv so later compilations can reference it + let (ixCommitName, _) := (CanonM.canonName commitName).run {} + let compileEnv'' := { compileEnv' with + nameToNamed := compileEnv'.nameToNamed.insert ixCommitName + ⟨payloadAddr, .empty⟩ + } + + return (commitAddr, leanEnv', compileEnv'') + +-- ============================================================================ +-- Build claims +-- ============================================================================ + +/-- Build an evaluation claim from input and output expressions. + Compiles both expressions to get their content addresses. -/ +def evalClaim (compileEnv : CompileM.CompileEnv) + (lvls : List Lean.Name) (input output type : Lean.Expr) + : Except String Claim := do + let (inputAddr, compileEnv') ← compileDef compileEnv lvls type input + let (outputAddr, _) ← compileDef compileEnv' lvls type output + return .eval inputAddr outputAddr + +/-- Build a check claim: asserts that the compiled definition is well-typed. -/ +def checkClaim (compileEnv : CompileM.CompileEnv) + (lvls : List Lean.Name) (type value : Lean.Expr) + : Except String Claim := do + let (addr, _) ← compileDef compileEnv lvls type value + return .check addr + +/-- Build a reveal claim from a commitment address and revealed field info. -/ +def revealClaim (comm : Address) (info : RevealConstantInfo) : Claim := + .reveal comm info + +-- ============================================================================ +-- Opening committed constants for reveal claims +-- ============================================================================ + +/-- Content address of a serialized expression: `blake3(putExpr e)`. -/ +def exprAddr (e : Ixon.Expr) : Address := + Address.blake3 (runPut (putExpr e)) + +/-- Open all fields of a compiled Constructor. -/ +def openConstructor (c : Ixon.Constructor) : RevealConstructorInfo := + { isUnsafe := some c.isUnsafe + , lvls := some c.lvls + , cidx := some c.cidx + , params := some c.params + , fields := some c.fields + , typ := some (exprAddr c.typ) } + +/-- Open a compiled RecursorRule at a given array index. -/ +def openRecursorRule (idx : UInt64) (r : Ixon.RecursorRule) : RevealRecursorRule := + ⟨idx, r.fields, exprAddr r.rhs⟩ + +/-- Open all fields of a compiled MutConst component. -/ +def openMutConst (mc : Ixon.MutConst) : RevealMutConstInfo := + match mc with + | .defn d => .defn (some d.kind) (some d.safety) (some d.lvls) + (some (exprAddr d.typ)) (some (exprAddr d.value)) + | .indc i => + let ctors := Id.run do + let mut arr := #[] + for j in [:i.ctors.size] do + arr := arr.push (j.toUInt64, openConstructor i.ctors[j]!) + return arr + .indc (some i.recr) (some i.refl) (some i.isUnsafe) + (some i.lvls) (some i.params) (some i.indices) (some i.nested) + (some (exprAddr i.typ)) (some ctors) + | .recr r => + let rules := Id.run do + let mut arr := #[] + for j in [:r.rules.size] do + arr := arr.push (openRecursorRule j.toUInt64 r.rules[j]!) + return arr + .recr (some r.k) (some r.isUnsafe) (some r.lvls) + (some r.params) (some r.indices) (some r.motives) (some r.minors) + (some (exprAddr r.typ)) (some rules) + +/-- Build a fully-revealed RevealConstantInfo from a compiled ConstantInfo. + All fields are set to `some`. To build a partial reveal, set unwanted + fields to `none` afterward, then pass to `revealClaim`. -/ +def openConstantInfo (ci : Ixon.ConstantInfo) : RevealConstantInfo := + match ci with + | .defn d => .defn (some d.kind) (some d.safety) (some d.lvls) + (some (exprAddr d.typ)) (some (exprAddr d.value)) + | .recr r => + let rules := Id.run do + let mut arr := #[] + for j in [:r.rules.size] do + arr := arr.push (openRecursorRule j.toUInt64 r.rules[j]!) + return arr + .recr (some r.k) (some r.isUnsafe) (some r.lvls) + (some r.params) (some r.indices) (some r.motives) (some r.minors) + (some (exprAddr r.typ)) (some rules) + | .axio a => .axio (some a.isUnsafe) (some a.lvls) (some (exprAddr a.typ)) + | .quot q => .quot (some q.kind) (some q.lvls) (some (exprAddr q.typ)) + | .cPrj p => .cPrj (some p.idx) (some p.cidx) (some p.block) + | .rPrj p => .rPrj (some p.idx) (some p.block) + | .iPrj p => .iPrj (some p.idx) (some p.block) + | .dPrj p => .dPrj (some p.idx) (some p.block) + | .muts ms => + let components := Id.run do + let mut arr := #[] + for j in [:ms.size] do + arr := arr.push (j.toUInt64, openMutConst ms[j]!) + return arr + .muts components + +/-- Look up a committed constant and build a fully-revealed RevealConstantInfo. + The caller should set unwanted fields to `none` for a partial reveal, + then pass the result to `revealClaim`. -/ +def openCommitment (compileEnv : CompileM.CompileEnv) (commitAddr : Address) + : Except String RevealConstantInfo := do + let commitName := Address.toUniqueName commitAddr + let (ixCommitName, _) := (CanonM.canonName commitName).run {} + let named ← match compileEnv.nameToNamed.get? ixCommitName with + | some n => pure n + | none => .error s!"openCommitment: unknown commitment {commitAddr}" + let constant ← match compileEnv.constants.get? named.addr with + | some c => pure c + | none => .error s!"openCommitment: payload {named.addr} not found" + return openConstantInfo constant.info + +end Ix.Commit diff --git a/Ix/Common.lean b/Ix/Common.lean index 530ec568..24e5ca86 100644 --- a/Ix/Common.lean +++ b/Ix/Common.lean @@ -43,6 +43,7 @@ deriving instance BEq, Repr, Ord, Hashable for Substring.Raw deriving instance BEq, Repr, Ord, Hashable for Lean.SourceInfo deriving instance BEq, Repr, Ord, Hashable for Lean.Syntax.Preresolved deriving instance BEq, Repr, Ord, Hashable for Lean.Syntax +deriving instance BEq, Repr, Ord, Hashable, Inhabited, Nonempty for Lean.DataValue deriving instance BEq, Repr for Ordering deriving instance BEq, Repr, Ord for Lean.FVarId deriving instance BEq, Repr, Ord for Lean.MVarId @@ -105,10 +106,23 @@ def Nat.fromBytesLE (xs: Array UInt8) : Nat := /-- Distinguish different kinds of Ix definitions --/ inductive Ix.DefKind where -| «definition» -| «opaque» -| «theorem» -deriving BEq, Ord, Hashable, Repr, Nonempty, Inhabited +| defn : Ix.DefKind +| opaq : Ix.DefKind +| thm : Ix.DefKind +deriving BEq, Ord, Hashable, Repr, Nonempty, Inhabited, DecidableEq + +inductive Ix.DefinitionSafety where + | unsaf : Ix.DefinitionSafety + | safe : Ix.DefinitionSafety + | part : Ix.DefinitionSafety + deriving BEq, Ord, Hashable, Repr, Nonempty, Inhabited, DecidableEq + +inductive Ix.QuotKind where + | type : Ix.QuotKind + | ctor : Ix.QuotKind + | lift : Ix.QuotKind + | ind : Ix.QuotKind + deriving BEq, Ord, Hashable, Repr, Nonempty, Inhabited, DecidableEq namespace List @@ -180,24 +194,9 @@ def joinM [Monad μ] : List (List α) → μ (List α) end List -def Std.HashMap.find? {A B} [BEq A] [Hashable A] (map: Std.HashMap A B) (a: A) - := Std.HashMap.get? map a - abbrev Ix.Map := Std.HashMap abbrev Ix.Set := Std.HashSet -abbrev MutCtx := Batteries.RBMap Lean.Name Nat compare - ---instance : BEq MutCtx where --- beq a b := a.size == b.size && a.fold --- (fun acc k v => acc && match b.find? k with --- | some v' => v == v' --- | none => false) true - ----- TODO: incremental comparison with ForIn zip -instance : Ord MutCtx where - compare a b := compare a.toList b.toList - namespace Lean def ConstantInfo.formatAll (c : ConstantInfo) : String := @@ -266,12 +265,9 @@ def setLibsPaths (s: String) : IO Unit := do cmd := "lake" args := #["setup-file", s] } - --IO.println s!"setup-file {out.stdout}" - --IO.println s!"setup-file {out.stderr}" let split := out.stdout.splitOn "\"oleanPath\":[" |>.getD 1 "" let split := split.splitOn "],\"loadDynlibPaths\":[" |>.getD 0 "" let paths := split.replace "\"" "" |>.splitOn ","|>.map System.FilePath.mk - --IO.println s!"paths {paths}" Lean.initSearchPath (← Lean.findSysroot) paths def runCmd' (cmd : String) (args : Array String) : IO $ Except String String := do @@ -305,92 +301,4 @@ def runFrontend (input : String) (filePath : FilePath) : IO Environment := do (← msgs.toList.mapM (·.toString)).map String.trim else return s.commandState.env ---def Expr.size: Expr -> Nat ---| .mdata _ x => 1 + x.size ---| .app f a => 1 + f.size + a.size ---| .lam bn bt b bi => 1 + bt.size + b.size ---| .forallE bn bt b bi => 1 + bt.size + b.size ---| .letE ln t v b nd => 1 + t.size + v.size + b.size ---| .proj tn i s => 1 + s.size ---| x => 1 - ---def Expr.size (e : Expr) : Nat := --- go e 0 ---where --- go e n := match e with --- | .mdata _ x => go x n + 1 --- | .app f a => go a (go f n + 1) --- | .lam bn bt b bi => go bt (go b n + 1) --- | .forallE bn bt b bi => go bt (go b n + 1) --- | .letE ln t v b nd => go b (go v (go t n + 1)) --- | .proj tn i s => go s n + 1 --- | x => n - ---def Expr.msize: Expr -> Nat ---| .mdata _ x => 1 + x.msize ---| .app f a => f.msize + a.msize ---| .lam bn bt b bi => bt.msize + b.msize ---| .forallE bn bt b bi => bt.msize + b.msize ---| .letE ln t v b nd => t.msize + v.msize + b.msize ---| .proj tn i s => s.msize ---| x => 0 --- ---def Expr.stripMData : Expr -> Expr ---| .mdata _ x => x.stripMData ---| .app f a => .app f.stripMData a.stripMData ---| .lam bn bt b bi => .lam bn bt.stripMData b.stripMData bi ---| .forallE bn bt b bi => .forallE bn bt.stripMData b.stripMData bi ---| .letE ln t v b nd => .letE ln t.stripMData v.stripMData b.stripMData nd ---| .proj tn i s => .proj tn i s.stripMData ---| x@(.lit ..) => x ---| x@(.const ..) => x ---| x@(.bvar ..) => x ---| x@(.fvar ..) => x ---| x@(.sort ..) => x ---| x@(.mvar ..) => x - ---def RecursorRule.stripMData : RecursorRule -> RecursorRule ---| x => --- dbg_trace s!"RecursorRule.stripMData" --- match x with --- | ⟨c, nf, rhs⟩ => ⟨c, nf, rhs.stripMData⟩ --- ---def RecursorRule.size : RecursorRule -> Nat ---| ⟨c, nf, rhs⟩ => rhs.size --- ---def RecursorRule.msize : RecursorRule -> Nat ---| ⟨c, nf, rhs⟩ => rhs.msize --- ---def ConstantInfo.stripMData : Lean.ConstantInfo -> Lean.ConstantInfo ---| x => --- dbg_trace s!"ConstantInfo.stripMData" --- match x with --- | .axiomInfo x => .axiomInfo { x with type := x.type.stripMData } --- | .defnInfo x => .defnInfo { x with type := x.type.stripMData, value := x.value.stripMData } --- | .thmInfo x => .thmInfo { x with type := x.type.stripMData, value := x.value.stripMData } --- | .quotInfo x => .quotInfo { x with type := x.type.stripMData } --- | .opaqueInfo x => .opaqueInfo { x with type := x.type.stripMData, value := x.value.stripMData } --- | .inductInfo x => .inductInfo { x with type := x.type.stripMData } --- | .ctorInfo x => .ctorInfo { x with type := x.type.stripMData } --- | .recInfo x => .recInfo { x with type := x.type.stripMData, rules := x.rules.map (·.stripMData) } --- ---def ConstantInfo.size : Lean.ConstantInfo -> Nat ---| .axiomInfo x => x.type.size ---| .defnInfo x => x.type.size + x.value.size ---| .thmInfo x => x.type.size + x.value.size ---| .quotInfo x => x.type.size ---| .opaqueInfo x => x.type.size + x.value.size ---| .inductInfo x => x.type.size ---| .ctorInfo x => x.type.size ---| .recInfo x => x.type.size + x.rules.foldr (fun a acc => a.size + acc) 0 --- ---def ConstantInfo.msize : Lean.ConstantInfo -> Nat ---| .axiomInfo x => x.type.msize ---| .defnInfo x => x.type.msize + x.value.msize ---| .thmInfo x => x.type.msize + x.value.msize ---| .quotInfo x => x.type.msize ---| .opaqueInfo x => x.type.msize + x.value.msize ---| .inductInfo x => x.type.msize ---| .ctorInfo x => x.type.msize ---| .recInfo x => x.type.msize + x.rules.foldr (fun a acc => a.msize + acc) 0 end Lean diff --git a/Ix/CompileM.lean b/Ix/CompileM.lean index d276bf71..e527f62c 100644 --- a/Ix/CompileM.lean +++ b/Ix/CompileM.lean @@ -1,1161 +1,1983 @@ +/- + CompileM: Lean Compiler for Ixon Format (Pure Implementation) + + Architecture: + - CompileState: global immutable state (Reader) + - BlockEnv: per-block read-only context (Reader) + - BlockCache: per-block mutable state (State) + - Pure sequential compilation + + This implementation is designed for correctness and formalization. + For performance, use the Rust implementation. +-/ + import Std.Data.HashMap +import Std.Sync import Ix.Ixon -import Ix.Address -import Ix.Mutual +import Ix.Environment +import Ix.Sharing import Ix.Common -import Ix.CondenseM -import Ix.GraphM import Ix.Store +import Ix.Mutual +import Ix.GraphM +import Ix.CondenseM import Ix.SOrder -import Ix.Cronos +import Ix.CanonM -namespace Ix -open Ixon hiding Substring +namespace Ix.CompileM -structure CompileEnv where - env: Lean.Environment - consts: Map Lean.Name MetaAddress - comms: Map Lean.Name MetaAddress - all: Set Lean.Name - current: Lean.Name - mutCtx: MutCtx - univCtx: List Lean.Name - -def CompileEnv.init - (env: Lean.Environment) - (consts: Map Lean.Name MetaAddress) - (comms: Map Lean.Name MetaAddress) - (all: Set Lean.Name) - (current: Lean.Name) - : CompileEnv := ⟨env, consts, comms, all, current, default, default⟩ - -structure CompileState where - constCache: Map Lean.Name MetaAddress - univCache: Map Lean.Level MetaAddress - exprCache: Map Lean.Expr MetaAddress - synCache: Map Lean.Syntax Ixon.Syntax - nameCache: Map Lean.Name Address - strCache: Map String Address - constCmp: Map (Lean.Name × Lean.Name) Ordering - blocks: Set MetaAddress - deriving Inhabited, Nonempty - -def CompileState.init : CompileState := - ⟨default, default, default, default, default, default, default, default⟩ +-- Need Nonempty for partial function compilation +instance : Nonempty SOrder := ⟨⟨true, .eq⟩⟩ -inductive CompileError where -| unknownConstant (curr unknown: Lean.Name): CompileError -| levelMetavariable : Lean.Level -> CompileError -| exprMetavariable : Lean.Expr -> CompileError -| exprFreeVariable : Lean.Expr -> CompileError -| invalidBVarIndex : Nat -> CompileError -| levelNotFound : Lean.Name -> Lean.Name -> List Lean.Name -> String -> CompileError -| invalidConstantKind : Lean.Name -> String -> String -> CompileError -| mutualBlockMissingProjection : Lean.Name -> CompileError ---| nonRecursorExtractedFromChildren : Lean.Name → CompileError -| cantFindMutIndex : Lean.Name -> MutCtx -> CompileError -| cantFindMutMeta : Lean.Name -> Map Address Address -> CompileError -| kernelException : Lean.Kernel.Exception → CompileError ---| cantPackLevel : Nat → CompileError ---| nonCongruentInductives : PreInd -> PreInd -> CompileError -| alphaInvarianceFailure : Lean.ConstantInfo -> MetaAddress -> Lean.ConstantInfo -> MetaAddress -> CompileError ---| dematBadMutualBlock: MutualBlock -> CompileError ---| dematBadInductiveBlock: InductiveBlock -> CompileError -| badMutualBlock: List (List MutConst) -> CompileError -| badIxonDeserialization : Address -> String -> CompileError -| unknownStoreAddress : Address -> CompileError -| condensationError : Lean.Name -> CompileError ---| emptyIndsEquivalenceClass: List (List PreInd) -> CompileError - -def CompileError.pretty : CompileError -> IO String -| .unknownConstant c n => pure s!"Unknown constant '{n}' @ {c}" -| .levelMetavariable l => pure s!"Unfilled level metavariable on universe '{l}'" -| .exprMetavariable e => pure s!"Unfilled level metavariable on expression '{e}'" -| .exprFreeVariable e => pure s!"Free variable in expression '{e}'" -| .invalidBVarIndex idx => pure s!"Invalid index {idx} for bound variable context" -| .levelNotFound c n ns msg => pure s!"'Level {n}' not found in '{ns}', {msg} @ {c}" -| .invalidConstantKind n ex gt => pure s!"Invalid constant kind for '{n}'. Expected '{ex}' but got '{gt}'" -| .mutualBlockMissingProjection n => pure s!"Constant '{n}' wasn't content-addressed in mutual block" -| .cantFindMutIndex n mc => pure s!"Can't find index for mutual definition '{n}' in {repr mc}" -| .cantFindMutMeta n ms => pure s!"Can't find metadata for mutual definition -'{n}' in {repr ms}" -| .kernelException e => (·.pretty 80) <$> (e.toMessageData .empty).format -| .alphaInvarianceFailure x xa y ya => - pure s!"alpha invariance failure {repr x} hashes to {xa}, but {repr y} hashes to {ya}" -| .badMutualBlock block => pure s!"bad mutual block {repr block}" -| .badIxonDeserialization a s => pure s!"bad deserialization of ixon at {a}, error: {s}" -| .unknownStoreAddress a => pure s!"unknown store address {a}" -| .condensationError n => pure s!"condensation error {n}" - -abbrev CompileM := - ReaderT CompileEnv <| ExceptT CompileError <| StateT CompileState IO - -abbrev CompileM.Result α := (Except CompileError α × CompileState) - -def CompileM.run (env: CompileEnv) (stt: CompileState) (c : CompileM α) - : IO (Except CompileError α × CompileState) - := StateT.run (ExceptT.run (ReaderT.run c env)) stt - ---def randByte (lo hi: Nat): CompileM Nat := do --- modifyGet fun s => --- let (res, g') := randNat s.prng lo hi --- (res, {s with prng := g'}) --- ---def freshSecret : CompileM Address := do --- let mut secret: ByteArray := default --- for _ in [:32] do --- let rand <- randByte 0 255 --- secret := secret.push rand.toUInt8 --- return ⟨secret⟩ - - --- add binding name to local context -def CompileM.withCurrent (name: Lean.Name) : CompileM α -> CompileM α := - withReader $ fun c => { c with current := name } - --- add levels to local context -def CompileM.withLevels (lvls : List Lean.Name) : CompileM α -> CompileM α := - withReader $ fun c => { c with univCtx := lvls } - --- add mutual recursion info to local context -def CompileM.withMutCtx (mutCtx : MutCtx) : CompileM α -> CompileM α := - withReader $ fun c => { c with mutCtx := mutCtx } - --- reset local context -def CompileM.resetCtx (current: Lean.Name) : CompileM α -> CompileM α := - withReader $ fun c => { c with univCtx := [], mutCtx := {}, current } - -def storeIxon (ixon: Ixon): CompileM Address := do - liftM (Store.write (Ixon.ser ixon)).toIO - -def storeString (str: String): CompileM Address := do - match (<- get).strCache.find? str with - | some addr => pure addr +/-- Global compilation environment shared across all blocks. -/ +structure CompileEnv where + /-- Canonicalized Leon environment -/ + env : Ix.Environment + /-- Map from constant name to Named (address + metadata) -/ + nameToNamed : Std.HashMap Name Ixon.Named + /-- Compiled constants storage -/ + constants : Std.HashMap Address Ixon.Constant + /-- Blob storage for literals -/ + blobs : Std.HashMap Address ByteArray + /-- Total bytes of serialized constants (for profiling) -/ + totalBytes : Nat + +/-- Initialize global state from canonicalization result. -/ +def CompileEnv.new (env: Ix.Environment) : CompileEnv := + { env, nameToNamed := {}, constants := {}, blobs := {}, totalBytes := 0 } + +instance : Inhabited CompileEnv where + default := { env := { consts := {} }, nameToNamed := {}, constants := {}, blobs := {}, totalBytes := 0 } + +/-- Result of compiling a block, including the main constant and any projections. -/ +structure BlockResult where + /-- The main block constant (Muts for mutual blocks, or direct constant) -/ + block : Ixon.Constant + /-- Pre-computed serialized bytes and address (avoids re-serialization). -/ + blockBytes : ByteArray + blockAddr : Address + /-- Metadata for the block constant (for singleton blocks without projections) -/ + blockMeta : Ixon.ConstantMeta := .empty + /-- Projections: each name maps to its projection constant and metadata. + Empty for single non-inductive constants (name maps directly to block). + For inductives/mutual blocks: contains IPrj/DPrj/RPrj/CPrj for each name. -/ + projections : Array (Name × Ixon.Constant × Ixon.ConstantMeta) := #[] + deriving Inhabited + +/-- Per-block compilation state and tables. -/ +structure BlockState where + /-- Expression compilation cache (keyed by Expr for O(1) lookup). + Value is (compiled expression, arena root index). -/ + exprCache : Std.HashMap Expr (Ixon.Expr × UInt64) := {} + /-- Universe compilation cache (keyed by Level for O(1) lookup) -/ + univCache : Std.HashMap Level Ixon.Univ := {} + /-- Constant comparison cache (by name pairs) -/ + cmpCache : Std.HashMap (Name × Name) Ordering := {} + /-- Reference table (ordered unique addresses) -/ + refs : Array Address := #[] + refsIndex : Std.HashMap Address UInt64 := {} + /-- Universe table (ordered unique universes) -/ + univs : Array Ixon.Univ := #[] + univsIndex : Std.HashMap Ixon.Univ UInt64 := {} + /-- Blob storage collected during block compilation -/ + blockBlobs : Std.HashMap Address ByteArray := {} + /-- Name components collected during block compilation -/ + blockNames : Std.HashMap Address Ix.Name := {} + /-- Arena-based expression metadata for the current constant -/ + arena : Ixon.ExprMetaArena := {} + deriving Inhabited + +/-- Get or insert a reference into the refs table, returning its index. -/ +def BlockState.internRef (cache : BlockState) (addr : Address) : BlockState × UInt64 := + match cache.refsIndex.get? addr with + | some idx => (cache, idx) | none => - liftM (Store.write (str.toUTF8)).toIO - -def storeNat (nat: Nat): CompileM Address := do - liftM (Store.write (⟨nat.toBytesLE⟩)).toIO - -def storeSerial [Serialize A] (a: A): CompileM Address := do - liftM (Store.write (Ixon.ser a)).toIO - -def storeMeta (met: Metadata): CompileM Address := do - liftM (Store.write (Ixon.ser met)).toIO - -def compileName (name: Lean.Name): CompileM Address := do - match (<- get).nameCache.find? name with - | some addr => - --dbg_trace "compileName cached {(<- read).current} {name}" - pure addr - | none => do - --dbg_trace "compileName {(<- read).current} {name}" - let addr <- go name - modifyGet fun stt => (addr, { stt with - nameCache := stt.nameCache.insert name addr - }) - where - go : Lean.Name -> CompileM Address - | .anonymous => storeIxon .nanon - | .str n s => do - let n' <- compileName n - let s' <- storeString s - storeIxon (.nstr n' s') - | .num n i => do - let n' <- compileName n - let i' <- storeNat i - storeIxon (.nnum n' i') - -/-- Defines an ordering for Lean universes -/ -def compareLevel (xctx yctx: List Lean.Name) - : Lean.Level -> Lean.Level -> CompileM SOrder - | x@(.mvar ..), _ => throw $ .levelMetavariable x - | _, y@(.mvar ..) => throw $ .levelMetavariable y - | .zero, .zero => return ⟨true, .eq⟩ - | .zero, _ => return ⟨true, .lt⟩ - | _, .zero => return ⟨true, .gt⟩ - | .succ x, .succ y => compareLevel xctx yctx x y - | .succ .., _ => return ⟨true, .lt⟩ - | _, .succ .. => return ⟨true, .gt⟩ - | .max xl xr, .max yl yr => SOrder.cmpM + let idx := cache.refs.size.toUInt64 + ({ cache with + refs := cache.refs.push addr + refsIndex := cache.refsIndex.insert addr idx + }, idx) + +/-- Get or insert a universe into the univs table, returning its index. -/ +def BlockState.internUniv (cache : BlockState) (u : Ixon.Univ) : BlockState × UInt64 := + match cache.univsIndex.get? u with + | some idx => (cache, idx) + | none => + let idx := cache.univs.size.toUInt64 + ({ cache with + univs := cache.univs.push u + univsIndex := cache.univsIndex.insert u idx + }, idx) + +/-- Per-block compilation environment. -/ +structure BlockEnv where + /-- All constants in current mutual block -/ + all : Set Name + /-- Current constant being compiled -/ + current : Name + /-- Mutual recursion context: name → index within block -/ + mutCtx : MutCtx + /-- Universe parameter context (de Bruijn indices) -/ + univCtx : List Name + +/-! ## Compilation Error -/ + +/-- Compilation error type. Variant order matches Rust CompileError (tags 0–5). -/ +inductive CompileError where + | missingConstant (name : String) + | missingAddress (addr : Address) + | invalidMutualBlock (reason : String) + | unsupportedExpr (desc : String) + | unknownUnivParam (curr param : String) + | serializeError (err : Ixon.SerializeError) + deriving Repr, BEq + +instance : ToString CompileError where + toString + | .missingConstant name => s!"missingConstant: {name}" + | .missingAddress addr => s!"missingAddress: {addr}" + | .invalidMutualBlock reason => s!"invalidMutualBlock: {reason}" + | .unsupportedExpr desc => s!"unsupportedExpr: {desc}" + | .unknownUnivParam curr param => s!"unknownUnivParam: compiling {curr}, param {param}" + | .serializeError err => s!"serializeError: {err}" + +abbrev CompileM := ReaderT (CompileEnv × BlockEnv) (ExceptT CompileError (StateT BlockState Id)) + +/-- Run a CompileM computation purely. -/ +def CompileM.run (compileEnv : CompileEnv) (blockEnv : BlockEnv) (blockState : BlockState) + (m : CompileM α) : Except CompileError (α × BlockState) := + match StateT.run (ExceptT.run (ReaderT.run m (compileEnv, blockEnv))) blockState with + | (Except.ok a, state') => Except.ok (a, state') + | (Except.error e, _) => Except.error e + +/-- Get the global compile environment. -/ +def getCompileEnv : CompileM CompileEnv := do + pure (← read).1 + +/-- Get the block environment. -/ +def getBlockEnv : CompileM BlockEnv := do + pure (← read).2 + +/-- Get the block state. -/ +def getBlockState : CompileM BlockState := do + get + +/-- Modify the block state. -/ +def modifyBlockState (f : BlockState → BlockState) : CompileM Unit := do + modify f + +/-- Modify the block state and return a value. -/ +def modifyGetBlockState (f : BlockState → α × BlockState) : CompileM α := do + modifyGet fun state => + let (a, state') := f state + (a, state') + +/-- Modify the block environment locally. -/ +def withBlockEnv (f : BlockEnv → BlockEnv) (m : CompileM α) : CompileM α := + withReader (fun (env, blockEnv) => (env, f blockEnv)) m + +/-- Set universe context. -/ +def withUnivCtx (univCtx : List Name) : CompileM α → CompileM α := + withBlockEnv fun env => { env with univCtx } + +/-- Set mutual context. -/ +def withMutCtx (mutCtx : MutCtx) : CompileM α → CompileM α := + withBlockEnv fun env => { env with mutCtx } + +/-- Get the mutual context as an array of name hashes, ordered by index then name. -/ +def getMutCtxAddrs : CompileM (Array Address) := do + let ctx := (← getBlockEnv).mutCtx + pure <| ctx.toList.toArray.qsort (fun a b => + if a.2 != b.2 then a.2 < b.2 else (compare a.1 b.1).isLT) |>.map (·.1.getHash) + +/-- Set current constant. -/ +def withCurrent (name : Name) : CompileM α → CompileM α := + withBlockEnv fun env => { env with current := name } + +/-- Set all constants in block. -/ +def withAll (all : Set Name) : CompileM α → CompileM α := + withBlockEnv fun env => { env with all } + +/-! ## Metadata Management (Arena-Based) -/ + +/-- Allocate a new node in the arena, returning its index. -/ +def allocArenaNode (node : Ixon.ExprMetaData) : CompileM UInt64 := + modifyGetBlockState fun c => + let idx := c.arena.nodes.size.toUInt64 + (idx, { c with arena := { nodes := c.arena.nodes.push node } }) + +/-- Take the current arena and reset for next constant. -/ +def takeArena : CompileM Ixon.ExprMetaArena := + modifyGetBlockState fun c => (c.arena, { c with arena := {} }) + +/-- Reset the arena for a new constant. -/ +def resetArena : CompileM Unit := + modifyBlockState fun c => { c with arena := {} } + +/-- Clear the expression cache (between constants to avoid cross-constant arena references). -/ +def clearExprCache : CompileM Unit := + modifyBlockState fun c => { c with exprCache := {} } + +/-! ## Universe Compilation -/ + +/-- Compile an Ix.Level to Ixon.Univ type. -/ +partial def compileUniv (lvl : Level) : CompileM Ixon.Univ := do + -- Check cache first (O(1) lookup via embedded hash) + let state ← getBlockState + if let some u := state.univCache.get? lvl then + return u + + let u ← match lvl with + | .zero _ => pure .zero + | .succ l _ => .succ <$> compileUniv l + | .max l r _ => .max <$> compileUniv l <*> compileUniv r + | .imax l r _ => .imax <$> compileUniv l <*> compileUniv r + | .param name _ => do + let ctx := (← getBlockEnv).univCtx + match ctx.idxOf? name with + | some i => pure (.var i.toUInt64) + | none => throw (.unknownUnivParam s!"{(← getBlockEnv).current}" s!"{name}") + | .mvar _ _ => throw (.unsupportedExpr "level metavariable") + + -- Cache result + modifyBlockState fun c => { c with univCache := c.univCache.insert lvl u } + pure u + +/-- Intern a universe into the block's univs table, returning its index. -/ +def internUniv (u : Ixon.Univ) : CompileM UInt64 := + modifyGetBlockState fun state => + let (state', idx) := state.internUniv u + (idx, state') + +/-- Compile and intern an Ix.Level, returning its univs table index. -/ +def compileAndInternUniv (lvl : Level) : CompileM UInt64 := do + let u ← compileUniv lvl + internUniv u + +/-! ## Reference Handling -/ + +/-- Intern an address into the block's refs table, returning its index. -/ +def internRef (addr : Address) : CompileM UInt64 := + modifyGetBlockState fun state => + let (state', idx) := state.internRef addr + (idx, state') + +/-- Look up a constant's address from the global compile environment. -/ +def lookupConstAddr (name : Name) : CompileM Address := do + let env ← getCompileEnv + match env.nameToNamed.get? name with + | some named => pure named.addr + | none => throw (.missingConstant s!"{name}") + +/-- Find a constant in the Ix environment. -/ +def findConst (name : Name) : CompileM ConstantInfo := do + let env ← getCompileEnv + match env.env.consts.get? name with + | some const => pure const + | none => throw (.missingConstant s!"{name}") + +/-- Get the Expr for a constant's type. -/ +def getConstType (name : Name) : CompileM Expr := do + let const ← findConst name + pure const.getCnst.type + +/-- Get the Expr for a definition/theorem/opaque value. -/ +def getConstValue (name : Name) : CompileM Expr := do + let const ← findConst name + match const with + | .defnInfo v => pure v.value + | .thmInfo v => pure v.value + | .opaqueInfo v => pure v.value + | _ => throw (.invalidMutualBlock s!"Constant {name} has no value") + +/-! ## DataValue and KVMap Compilation -/ + +/-- Serialize an Ix.Int to bytes. -/ +def serializeIxInt (i : Ix.Int) : ByteArray := + match i with + | .ofNat n => + let natBytes := ByteArray.mk (Nat.toBytesLE n) + ByteArray.mk #[0] ++ natBytes + | .negSucc n => + let natBytes := ByteArray.mk (Nat.toBytesLE n) + ByteArray.mk #[1] ++ natBytes + +/-- Store a string as a blob and return its 32-byte address. -/ +def storeString (s : String) : CompileM Address := do + let bytes := s.toUTF8 + let addr := Address.blake3 bytes + modifyBlockState fun c => { c with blockBlobs := c.blockBlobs.insert addr bytes } + pure addr + +/-- Compile a name: store all string components as blobs and track + name components in blockNames for deduplication. + This matches Rust's compile_name behavior. -/ +partial def compileName (name : Ix.Name) : CompileM Unit := do + let addr := name.getHash + let state ← getBlockState + if state.blockNames.contains addr then return () + match name with + | .anonymous _ => + modifyBlockState fun c => + { c with blockNames := c.blockNames.insert addr name } + | .str parent s _ => + modifyBlockState fun c => + { c with blockNames := c.blockNames.insert addr name } + discard <| storeString s + compileName parent + | .num parent _ _ => + modifyBlockState fun c => + { c with blockNames := c.blockNames.insert addr name } + compileName parent + +/-- Serialize a u64 in trimmed little-endian format (only necessary bytes). + Uses Ixon.u64ByteCount for the byte count calculation. -/ +def putU64TrimmedLE (x : UInt64) : ByteArray := Id.run do + let count := Ixon.u64ByteCount x + let mut bytes := ByteArray.empty + let mut v := x + for _ in [:count.toNat] do + bytes := bytes.push (v &&& 0xFF).toUInt8 + v := v >>> 8 + bytes + +/-- Serialize a Nat using Tag0 encoding (variable length, compact for small values). + Uses Ixon.u64ByteCount for the byte count calculation. -/ +def putTag0 (n : Nat) : ByteArray := + let x := n.toUInt64 + if x < 128 then + ByteArray.mk #[x.toUInt8] + else + let byteCount := Ixon.u64ByteCount x + ByteArray.mk #[0x80 ||| (byteCount - 1)] ++ putU64TrimmedLE x + +/-- Serialize an Ix.Substring to bytes, storing strings as blobs. -/ +def serializeIxSubstring (ss : Ix.Substring) : CompileM ByteArray := do + let strAddr ← storeString ss.str + pure (strAddr.hash ++ putTag0 ss.startPos ++ putTag0 ss.stopPos) + +/-- Serialize an Ix.SourceInfo to bytes, storing strings as blobs. -/ +def serializeIxSourceInfo (si : Ix.SourceInfo) : CompileM ByteArray := do + match si with + | .original leading leadingPos trailing trailingPos => + let leadingBytes ← serializeIxSubstring leading + let trailingBytes ← serializeIxSubstring trailing + pure (ByteArray.mk #[0] ++ leadingBytes ++ putTag0 leadingPos ++ + trailingBytes ++ putTag0 trailingPos) + | .synthetic start stop canonical => + pure (ByteArray.mk #[1] ++ putTag0 start ++ putTag0 stop ++ + ByteArray.mk #[if canonical then 1 else 0]) + | .none => pure (ByteArray.mk #[2]) + +/-- Serialize an Ix.SyntaxPreresolved to bytes, storing strings as blobs. -/ +def serializeIxSyntaxPreresolved (sp : Ix.SyntaxPreresolved) : CompileM ByteArray := do + match sp with + | .namespace name => + compileName name + pure (ByteArray.mk #[0] ++ name.getHash.hash) + | .decl name aliases => + compileName name + let header := ByteArray.mk #[1] ++ name.getHash.hash ++ putTag0 aliases.size + let mut aliasesBytes := ByteArray.empty + for a in aliases do + let addr ← storeString a + aliasesBytes := aliasesBytes ++ addr.hash + pure (header ++ aliasesBytes) + +/-- Serialize an Ix.Syntax to bytes, storing strings as blobs. -/ +partial def serializeIxSyntax (syn : Ix.Syntax) : CompileM ByteArray := do + match syn with + | .missing => pure (ByteArray.mk #[0]) + | .node info kind args => + compileName kind + let header := ByteArray.mk #[1] + let infoBytes ← serializeIxSourceInfo info + let kindBytes := kind.getHash.hash + let lenBytes := putTag0 args.size + let mut argsBytes := ByteArray.empty + for arg in args do + argsBytes := argsBytes ++ (← serializeIxSyntax arg) + pure (header ++ infoBytes ++ kindBytes ++ lenBytes ++ argsBytes) + | .atom info val => + let infoBytes ← serializeIxSourceInfo info + let valAddr ← storeString val + pure (ByteArray.mk #[2] ++ infoBytes ++ valAddr.hash) + | .ident info rawVal val preresolved => + compileName val + let header := ByteArray.mk #[3] + let infoBytes ← serializeIxSourceInfo info + let rawBytes ← serializeIxSubstring rawVal + let valBytes := val.getHash.hash + let lenBytes := putTag0 preresolved.size + let mut presBytes := ByteArray.empty + for pr in preresolved do + presBytes := presBytes ++ (← serializeIxSyntaxPreresolved pr) + pure (header ++ infoBytes ++ rawBytes ++ valBytes ++ lenBytes ++ presBytes) + +/-- Compile a DataValue to Ixon.DataValue, storing blobs as needed. -/ +def compileDataValue (dv : Ix.DataValue) : CompileM Ixon.DataValue := do + match dv with + | .ofString s => + let bytes := s.toUTF8 + let addr := Address.blake3 bytes + modifyBlockState fun c => { c with blockBlobs := c.blockBlobs.insert addr bytes } + pure (.ofString addr) + | .ofBool b => pure (.ofBool b) + | .ofName n => + compileName n + pure (.ofName n.getHash) + | .ofNat n => + let bytes := ByteArray.mk (Nat.toBytesLE n) + let addr := Address.blake3 bytes + modifyBlockState fun c => { c with blockBlobs := c.blockBlobs.insert addr bytes } + pure (.ofNat addr) + | .ofInt i => + let bytes := serializeIxInt i + let addr := Address.blake3 bytes + modifyBlockState fun c => { c with blockBlobs := c.blockBlobs.insert addr bytes } + pure (.ofInt addr) + | .ofSyntax syn => + let bytes ← serializeIxSyntax syn + let addr := Address.blake3 bytes + modifyBlockState fun c => { c with blockBlobs := c.blockBlobs.insert addr bytes } + pure (.ofSyntax addr) + +/-- Compile a KVMap (array of name-value pairs). -/ +def compileKVMap (kvs : Array (Ix.Name × Ix.DataValue)) : CompileM Ixon.KVMap := do + kvs.mapM fun (k, v) => do + compileName k + let vData ← compileDataValue v + pure (k.getHash, vData) + +/-! ## Expression Compilation -/ + +/-- Compile a canonical Ix.Expr to Ixon.Expr with arena-based metadata. + Returns (compiled expression, arena root index). + Uses Ix.Expr as cache key for O(1) lookup via embedded hash. -/ +partial def compileExpr (e : Expr) : CompileM (Ixon.Expr × UInt64) := do + -- Check cache (O(1) lookup via embedded hash) + let state ← getBlockState + if let some cached := state.exprCache.get? e then + return cached + + let (result, root) ← match e with + | .bvar idx _ => do + let root ← allocArenaNode .leaf + pure (.var idx.toUInt64, root) + + | .sort lvl _ => do + let idx ← compileAndInternUniv lvl + let root ← allocArenaNode .leaf + pure (.sort idx, root) + + | .const name lvls _ => do + let mutCtx := (← getBlockEnv).mutCtx + let univIndices ← lvls.mapM compileAndInternUniv + compileName name + let nameAddr := name.getHash + match mutCtx.find? name with + | some recIdx => + let root ← allocArenaNode (.ref nameAddr) + pure (.recur recIdx.toUInt64 univIndices, root) + | none => do + let addr ← lookupConstAddr name + let refIdx ← internRef addr + let root ← allocArenaNode (.ref nameAddr) + pure (.ref refIdx univIndices, root) + + | .app func arg _ => do + let (f, fRoot) ← compileExpr func + let (a, aRoot) ← compileExpr arg + let root ← allocArenaNode (.app fRoot aRoot) + pure (.app f a, root) + + | .lam name ty body bi _ => do + compileName name + let nameAddr := name.getHash + let (t, tyRoot) ← compileExpr ty + let (b, bodyRoot) ← compileExpr body + let root ← allocArenaNode (.binder nameAddr bi tyRoot bodyRoot) + pure (.lam t b, root) + + | .forallE name ty body bi _ => do + compileName name + let nameAddr := name.getHash + let (t, tyRoot) ← compileExpr ty + let (b, bodyRoot) ← compileExpr body + let root ← allocArenaNode (.binder nameAddr bi tyRoot bodyRoot) + pure (.all t b, root) + + | .letE name ty val body nonDep _ => do + compileName name + let nameAddr := name.getHash + let (t, tyRoot) ← compileExpr ty + let (v, valRoot) ← compileExpr val + let (b, bodyRoot) ← compileExpr body + let root ← allocArenaNode (.letBinder nameAddr tyRoot valRoot bodyRoot) + pure (.letE nonDep t v b, root) + + | .lit (.natVal n) _ => do + let bytes := ByteArray.mk (Nat.toBytesLE n) + let addr := Address.blake3 bytes + modifyBlockState fun c => { c with blockBlobs := c.blockBlobs.insert addr bytes } + let idx ← internRef addr + let root ← allocArenaNode .leaf + pure (.nat idx, root) + + | .lit (.strVal s) _ => do + let bytes := s.toUTF8 + let addr := Address.blake3 bytes + modifyBlockState fun c => { c with blockBlobs := c.blockBlobs.insert addr bytes } + let idx ← internRef addr + let root ← allocArenaNode .leaf + pure (.str idx, root) + + | .proj typeName fieldIdx struct _ => do + compileName typeName + let typeAddr ← lookupConstAddr typeName + let typeRefIdx ← internRef typeAddr + let structNameAddr := typeName.getHash + let (s, sRoot) ← compileExpr struct + let root ← allocArenaNode (.prj structNameAddr sRoot) + pure (.prj typeRefIdx fieldIdx.toUInt64 s, root) + + | .mdata kvData inner _ => do + let kvmap ← compileKVMap kvData + let (innerResult, innerRoot) ← compileExpr inner + let root ← allocArenaNode (.mdata #[kvmap] innerRoot) + pure (innerResult, root) + + | .fvar _ _ => throw (.unsupportedExpr "free variable") + | .mvar _ _ => throw (.unsupportedExpr "metavariable") + + -- Store in block-local cache + modifyBlockState fun c => { c with exprCache := c.exprCache.insert e (result, root) } + + pure (result, root) + +/-! ## Level Comparison -/ + +/-- Compare two Ix levels for ordering. -/ +def compareLevel (xctx yctx : List Name) + : Level → Level → CompileM SOrder + | .mvar .., _ => throw (.unsupportedExpr "level metavariable") + | _, .mvar .. => throw (.unsupportedExpr "level metavariable") + | .zero _, .zero _ => pure ⟨true, .eq⟩ + | .zero _, _ => pure ⟨true, .lt⟩ + | _, .zero _ => pure ⟨true, .gt⟩ + | .succ x _, .succ y _ => compareLevel xctx yctx x y + | .succ .., _ => pure ⟨true, .lt⟩ + | _, .succ .. => pure ⟨true, .gt⟩ + | .max xl xr _, .max yl yr _ => SOrder.cmpM (compareLevel xctx yctx xl yl) (compareLevel xctx yctx xr yr) - | .max .., _ => return ⟨true, .lt⟩ - | _, .max .. => return ⟨true, .gt⟩ - | .imax xl xr, .imax yl yr => SOrder.cmpM + | .max .., _ => pure ⟨true, .lt⟩ + | _, .max .. => pure ⟨true, .gt⟩ + | .imax xl xr _, .imax yl yr _ => SOrder.cmpM (compareLevel xctx yctx xl yl) (compareLevel xctx yctx xr yr) - | .imax .., _ => return ⟨true, .lt⟩ - | _, .imax .. => return ⟨true, .gt⟩ - | .param x, .param y => do + | .imax .., _ => pure ⟨true, .lt⟩ + | _, .imax .. => pure ⟨true, .gt⟩ + | .param x _, .param y _ => do match (xctx.idxOf? x), (yctx.idxOf? y) with - | some xi, some yi => return ⟨true, compare xi yi⟩ - | none, _ => - throw $ .levelNotFound (<- read).current x xctx s!"compareLevel" - | _, none => - throw $ .levelNotFound (<- read).current y yctx s!"compareLevel" - -/-- Canonicalizes a Lean universe level --/ -def compileLevel (lvl: Lean.Level): CompileM MetaAddress := do - match (<- get).univCache.find? lvl with - | some l => - --dbg_trace "compileLevel cached {(<- get).univCache.size} {(<- read).current}" - pure l - | none => do - --dbg_trace "compileLevel {(<- get).univCache.size} {(<- read).current}" - let (dat, met) <- go lvl - let maddr := ⟨<- storeIxon dat, <- storeIxon met⟩ - modifyGet fun stt => (maddr, { stt with - univCache := stt.univCache.insert lvl maddr - }) - where - go : Lean.Level -> CompileM (Ixon × Ixon) - | .zero => pure (.uzero, .meta default) - | .succ x => do - let ⟨a, m⟩ <- compileLevel x - pure (.usucc a, .meta ⟨[.link m]⟩) - | .max x y => do - let ⟨xa, xm⟩ <- compileLevel x - let ⟨ya, ym⟩ <- compileLevel y - pure (.umax xa ya, .meta ⟨[.link xm, .link ym]⟩) - | .imax x y => do - let ⟨xa, xm⟩ <- compileLevel x - let ⟨ya, ym⟩ <- compileLevel y - pure (.uimax xa ya, .meta ⟨[.link xm, .link ym]⟩) - | .param n => do - let lvls := (← read).univCtx - match lvls.idxOf? n with - | some i => pure (.uvar i, .meta ⟨[.link (<- compileName n)]⟩) - | none => do throw <| .levelNotFound (<- read).current n lvls s!"compileLevel" - | l@(.mvar ..) => throw $ .levelMetavariable l - -def compileSubstring : Substring.Raw -> CompileM Ixon.Substring -| ⟨str, startPos, stopPos⟩ => do - pure ⟨<- storeString str, startPos.byteIdx, stopPos.byteIdx⟩ - -def compileSourceInfo : Lean.SourceInfo -> CompileM Ixon.SourceInfo -| .original l p t e => do - let l' <- compileSubstring l - let t' <- compileSubstring t - pure <| .original l' p.byteIdx t' e.byteIdx -| .synthetic p e c => pure (.synthetic p.byteIdx e.byteIdx c) -| .none => pure .none - -def compilePreresolved : Lean.Syntax.Preresolved -> CompileM Ixon.Preresolved -| .namespace ns => .namespace <$> compileName ns -| .decl n fs => .decl <$> compileName n <*> fs.mapM storeString - -partial def compileSyntax (syn: Lean.Syntax) : CompileM Ixon.Syntax := do - --dbg_trace "compileSyntax {(<- read).current}" - match (<- get).synCache.find? syn with - | some x => pure x - | none => do - let syn' <- go syn - modifyGet fun stt => (syn', { stt with - synCache := stt.synCache.insert syn syn' - }) - where - go : Lean.Syntax -> CompileM Ixon.Syntax - | .missing => pure .missing - | .node info kind args => do - let info' <- compileSourceInfo info - let kind' <- compileName kind - let args' <- args.toList.mapM (compileSyntax · >>= storeSerial) - pure <| .node info' kind' args' - | .atom info val => do - let info' <- compileSourceInfo info - let val' <- storeString val - pure <| .atom info' val' - | .ident info rawVal val preresolved => do - let info' <- compileSourceInfo info - let rawVal' <- compileSubstring rawVal - let val' <- compileName val - let ps' <- preresolved.mapM compilePreresolved - pure <| .ident info' rawVal' val' ps' - -def compileDataValue : Lean.DataValue -> CompileM Ixon.DataValue -| .ofString s => .ofString <$> storeString s -| .ofBool b => pure (.ofBool b) -| .ofName n => .ofName <$> compileName n -| .ofNat i => .ofNat <$> storeNat i -| .ofInt i => .ofInt <$> storeSerial i -| .ofSyntax s => .ofSyntax <$> (compileSyntax s >>= storeSerial) - -def compileKVMap (map: Lean.KVMap): CompileM Address := do - let mut list := #[] - for (name, dataValue) in map do - let n <- compileName name - let d <- compileDataValue dataValue - list := list.push (n, d) - storeIxon (.meta ⟨[.kvmap list.toList]⟩) - -def findLeanConst (name : Lean.Name) : CompileM Lean.ConstantInfo := do - match (<- read).env.constants.find? name with - | some const => pure const - | none => throw $ .unknownConstant (<- read).current name - -def MutConst.mkIndc (i: Lean.InductiveVal) : CompileM MutConst := do - let ctors <- i.ctors.mapM getCtor - return .indc ⟨i.name, i.levelParams, i.type, i.numParams, i.numIndices, i.all, - ctors, i.numNested, i.isRec, i.isReflexive, i.isUnsafe⟩ - where - getCtor (name: Lean.Name) : CompileM (Lean.ConstructorVal) := do - match (<- findLeanConst name) with - | .ctorInfo c => pure c - | _ => throw <| .invalidConstantKind name "constructor" "" - -def compileReference (name: Lean.Name): CompileM MetaAddress := do - if name == Lean.Name.mkSimple "_obj" then - return ⟨<- storeIxon <| .prim .obj, <- storeIxon <| .meta ⟨[]⟩⟩ - else if name == Lean.Name.mkSimple "_neutral" then - return ⟨<- storeIxon <| .prim .neutral, <- storeIxon <| .meta ⟨[]⟩⟩ - else if name == Lean.Name.mkSimple "_unreachable" then - return ⟨<- storeIxon <| .prim .unreachable, <- storeIxon <| .meta ⟨[]⟩⟩ - else match (<- read).comms.find? name with - | some comm => pure comm - | none => match (<- read).consts.find? name with - | some ref => pure ref - | none => do throw <| .unknownConstant (<- read).current name - -def compileExpr: Lean.Expr -> CompileM MetaAddress -| expr => do match (<- get).exprCache.find? expr with - | some x => pure x - | none => do - --dbg_trace s!"compileExpr {(<- read).current} {(<- get).exprCache.size}" - let maddr <- go expr - modifyGet fun stt => (maddr, { stt with - exprCache := stt.exprCache.insert expr maddr - }) - where - go: Lean.Expr -> CompileM MetaAddress - | (.mdata kv x) => do - let md <- compileKVMap kv - let x <- compileExpr x - return ⟨x.data, <- storeIxon (.meta ⟨[.link md, .link x.meta]⟩)⟩ - | .bvar idx => do - --dbg_trace s!"compileExpr {(<- read).current} bvar" - let dat := .evar idx - let met := .meta ⟨[]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | .sort univ => do - --dbg_trace s!"compileExpr {(<- read).current} sort" - let ⟨udata, umeta⟩ <- compileLevel univ - let dat := .esort udata - let met := .meta ⟨[.link umeta]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | .const name lvls => do - --dbg_trace s!"compileExpr {(<- read).current} const" - let n <- compileName name - let us <- lvls.mapM compileLevel - match (← read).mutCtx.find? name with - | some idx => - --dbg_trace s!"compileExpr {(<- read).current} const rec" - let dat := .erec idx (us.map (·.data)) - let met := .meta ⟨[.link n, .links (us.map (·.meta))]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | none => do - let ref <- compileReference name - --dbg_trace s!"compileExpr {(<- read).current}, const ref {name}, mutCtx: {repr (<- read).mutCtx}" - let dat := .eref ref.data (us.map (·.data)) - let met := .meta ⟨[.link n, .link ref.meta, .links (us.map (·.meta))]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | .app func argm => do - --dbg_trace s!"compileExpr {(<- read).current} app" - let f <- compileExpr func - let a <- compileExpr argm - let dat := .eapp f.data a.data - let met := .meta ⟨[.link f.meta, .link a.meta]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | .lam name type body info => do - --dbg_trace s!"compileExpr {(<- read).current} lam" - let n <- compileName name - let t <- compileExpr type - let b <- compileExpr body - let dat := .elam t.data b.data - let met := .meta ⟨[.link n, .info info, .link t.meta, .link b.meta]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | .forallE name type body info => do - --dbg_trace s!"compileExpr {(<- read).current} all" - --dbg_trace s!"compileExpr {(<- read).current} all md" - let n <- compileName name - --dbg_trace s!"compileExpr {(<- read).current} all n" - let t <- compileExpr type - --dbg_trace s!"compileExpr {(<- read).current} all t" - let b <- compileExpr body - --dbg_trace s!"compileExpr {(<- read).current} all b" - let dat := .eall t.data b.data - let met := .meta ⟨[.link n, .info info, .link t.meta, .link b.meta]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | .letE name type value body nD => do - --dbg_trace s!"compileExpr {(<- read).current} let" - let n <- compileName name - let t <- compileExpr type - let v <- compileExpr value - let b <- compileExpr body - let dat:= .elet nD t.data v.data b.data - let met := .meta ⟨[.link n, .link t.meta, .link v.meta, .link b.meta]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | .lit (.natVal n) => do - let dat := .enat (<- storeNat n) - let met := .meta ⟨[]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | .lit (.strVal s) => do - --dbg_trace s!"compileExpr {(<- read).current} lit str" - let dat := .estr (<- storeString s) - let met := .meta ⟨[]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | .proj typeName idx struct => do - --dbg_trace s!"compileExpr {(<- read).current} lit proj" - let t <- compileReference typeName - let n <- compileName typeName - let s <- compileExpr struct - let dat := .eprj t.data idx s.data - let met := .meta ⟨[.link n, .link t.meta, .link s.meta]⟩ - pure ⟨<- storeIxon dat, <- storeIxon met⟩ - | expr@(.fvar ..) => throw $ .exprFreeVariable expr - | expr@(.mvar ..) => throw $ .exprMetavariable expr - -def compileDefn: Ix.Def -> CompileM (Ixon.Definition × Ixon.Metadata) -| d => .withLevels d.levelParams do - --dbg_trace "compileDefn" - let n <- compileName d.name - let ls <- d.levelParams.mapM compileName - let t <- compileExpr d.type - let v <- compileExpr d.value - let as <- d.all.mapM compileName - let dat := ⟨d.kind, d.safety, ls.length, t.data, v.data⟩ - let met := ⟨[.link n, .links ls, .hints d.hints, .link t.meta, .link v.meta, .links as]⟩ - return (dat, met) - -partial def compileRule: Lean.RecursorRule -> CompileM (Ixon.RecursorRule × Address × Address) -| r => do - --dbg_trace "compileRule" - let n <- compileName r.ctor - let rhs <- compileExpr r.rhs - pure (⟨r.nfields, rhs.data⟩, (n, rhs.meta)) - -def compileRecr: Lean.RecursorVal -> CompileM (Ixon.Recursor × Metadata) -| r => .withLevels r.levelParams <| do - --dbg_trace s!"compileRecr {(<- read).current} {repr <| r.name} mutCtx: {repr (<- read).mutCtx}" - let n <- compileName r.name - let ls <- r.levelParams.mapM compileName - let t <- compileExpr r.type - let rules <- r.rules.mapM compileRule - let as <- r.all.mapM compileName - let dat := ⟨r.k, r.isUnsafe, ls.length, r.numParams, r.numIndices, - r.numMotives, r.numMinors, t.data, rules.map (·.1)⟩ - let met := ⟨[.link n, .links ls, .link t.meta, .map (rules.map (·.2)), .links as]⟩ - pure (dat, met) - -def compileConstructor (induct: Address) -: Lean.ConstructorVal -> CompileM (Ixon.Constructor × Metadata) -| c => .withLevels c.levelParams <| do - --dbg_trace s!"compileCtor {(<- read).current} {repr <| c.name} mutCtx: {repr (<- read).mutCtx}" - let n <- compileName c.name - let ls <- c.levelParams.mapM compileName - let t <- compileExpr c.type - let dat := ⟨c.isUnsafe, ls.length, c.cidx, c.numParams, c.numFields, t.data⟩ - let met := ⟨[.link n, .links ls, .link t.meta, .link induct]⟩ - pure (dat, met) - -partial def compileIndc: Ix.Ind -> CompileM (Ixon.Inductive × Map Address Address) -| ⟨name, lvls, type, ps, is, all, ctors, nest, rcr, refl, usafe⟩ => - .withLevels lvls do - --dbg_trace s!"compileIndc {(<- read).current} {repr <| name} mutCtx: {repr (<- read).mutCtx}" - let n <- compileName name - let ls <- lvls.mapM compileName - let t <- compileExpr type - let mut cds := #[] - let mut cms := #[] - let mut metaMap := {} - for ctor in ctors do - let (cd, cm) <- compileConstructor n ctor - let cn <- compileName ctor.name - cds := cds.push cd - let cm' <- storeMeta cm - cms := cms.push cm' - metaMap := metaMap.insert cn cm' - let as <- all.mapM compileName - let data := ⟨rcr, refl, usafe, ls.length, ps, is, nest, t.data, cds.toList⟩ - let «meta» := ⟨[.link n, .links ls, .link t.meta, .links cms.toList, .links as]⟩ - let m <- storeMeta «meta» - metaMap := metaMap.insert n m - pure (data, metaMap) - -/-- A name-irrelevant ordering of Lean expressions --/ -def compareExpr (ctx: MutCtx) (xlvls ylvls: List Lean.Name) - (x y: Lean.Expr): CompileM SOrder := do - --dbg_trace "compareExpr" + | some xi, some yi => pure ⟨true, compare xi yi⟩ + | none, _ => throw (.unknownUnivParam s!"{(← getBlockEnv).current}" s!"{x}") + | _, none => throw (.unknownUnivParam s!"{(← getBlockEnv).current}" s!"{y}") + +/-! ## Expression Comparison -/ + +/-- Name-irrelevant ordering of Ix expressions. + Matches Rust's compare_expr - no caching, handles mdata inline. -/ +partial def compareExpr (ctx : Ix.MutCtx) (xlvls ylvls : List Name) + (x y : Expr) : CompileM SOrder := do match x, y with - | e@(.mvar ..), _ => throw $ .exprMetavariable e - | _, e@(.mvar ..) => throw $ .exprMetavariable e - | e@(.fvar ..), _ => throw $ .exprFreeVariable e - | _, e@(.fvar ..) => throw $ .exprFreeVariable e - | .mdata _ x, .mdata _ y => compareExpr ctx xlvls ylvls x y - | .mdata _ x, y => compareExpr ctx xlvls ylvls x y - | x, .mdata _ y => compareExpr ctx xlvls ylvls x y - | .bvar x, .bvar y => return ⟨true, compare x y⟩ - | .bvar .., _ => return ⟨true, .lt⟩ - | _, .bvar .. => return ⟨true, .gt⟩ - | .sort x, .sort y => compareLevel xlvls ylvls x y - | .sort .., _ => return ⟨true, .lt⟩ - | _, .sort .. => return ⟨true, .gt⟩ - | .const x xls, .const y yls => do - let univs ← SOrder.zipM (compareLevel xlvls ylvls) xls yls - if univs.ord != .eq then return univs - if x == y then return ⟨true, .eq⟩ + | .mvar .., _ => throw (.unsupportedExpr "metavariable in comparison") + | _, .mvar .. => throw (.unsupportedExpr "metavariable in comparison") + | .fvar .., _ => throw (.unsupportedExpr "fvar in comparison") + | _, .fvar .. => throw (.unsupportedExpr "fvar in comparison") + | .mdata _ x _, .mdata _ y _ => compareExpr ctx xlvls ylvls x y + | .mdata _ x _, y => compareExpr ctx xlvls ylvls x y + | x, .mdata _ y _ => compareExpr ctx xlvls ylvls x y + | .bvar x _, .bvar y _ => pure ⟨true, compare x y⟩ + | .bvar .., _ => pure ⟨true, .lt⟩ + | _, .bvar .. => pure ⟨true, .gt⟩ + | .sort x _, .sort y _ => compareLevel xlvls ylvls x y + | .sort .., _ => pure ⟨true, .lt⟩ + | _, .sort .. => pure ⟨true, .gt⟩ + | .const x xls _, .const y yls _ => do + let univs ← SOrder.zipM (compareLevel xlvls ylvls) xls.toList yls.toList + if univs.ord != .eq then pure univs + else if x == y then pure ⟨true, .eq⟩ else match ctx.find? x, ctx.find? y with - | some nx, some ny => return ⟨false, compare nx ny⟩ - | some _, none => return ⟨true, .lt⟩ - | none, some _ => return ⟨true, .gt⟩ + | some nx, some ny => pure ⟨false, compare nx ny⟩ + | some _, none => pure ⟨true, .lt⟩ + | none, some _ => pure ⟨true, .gt⟩ | none, none => do - --dbg_trace s!"compareExpr const {(<- read).current} consts {x} {y}" - let x' <- compileReference x - let y' <- compileReference y - return ⟨true, compare x'.data y'.data⟩ - | .const .., _ => return ⟨true, .lt⟩ - | _, .const .. => return ⟨true, .gt⟩ - | .app xf xa, .app yf ya => + let x' ← lookupConstAddr x + let y' ← lookupConstAddr y + pure ⟨true, compare x' y'⟩ + | .const .., _ => pure ⟨true, .lt⟩ + | _, .const .. => pure ⟨true, .gt⟩ + | .app xf xa _, .app yf ya _ => SOrder.cmpM (compareExpr ctx xlvls ylvls xf yf) (compareExpr ctx xlvls ylvls xa ya) - | .app .., _ => return ⟨true, .lt⟩ - | _, .app .. => return ⟨true, .gt⟩ - | .lam _ xt xb _, .lam _ yt yb _ => + | .app .., _ => pure ⟨true, .lt⟩ + | _, .app .. => pure ⟨true, .gt⟩ + | .lam _ xt xb _ _, .lam _ yt yb _ _ => SOrder.cmpM (compareExpr ctx xlvls ylvls xt yt) (compareExpr ctx xlvls ylvls xb yb) - | .lam .., _ => return ⟨true, .lt⟩ - | _, .lam .. => return ⟨true, .gt⟩ - | .forallE _ xt xb _, .forallE _ yt yb _ => + | .lam .., _ => pure ⟨true, .lt⟩ + | _, .lam .. => pure ⟨true, .gt⟩ + | .forallE _ xt xb _ _, .forallE _ yt yb _ _ => SOrder.cmpM (compareExpr ctx xlvls ylvls xt yt) (compareExpr ctx xlvls ylvls xb yb) - | .forallE .., _ => return ⟨true, .lt⟩ - | _, .forallE .. => return ⟨true, .gt⟩ - | .letE _ xt xv xb _, .letE _ yt yv yb _ => + | .forallE .., _ => pure ⟨true, .lt⟩ + | _, .forallE .. => pure ⟨true, .gt⟩ + | .letE _ xt xv xb _ _, .letE _ yt yv yb _ _ => SOrder.cmpM (compareExpr ctx xlvls ylvls xt yt) <| SOrder.cmpM (compareExpr ctx xlvls ylvls xv yv) (compareExpr ctx xlvls ylvls xb yb) - | .letE .., _ => return ⟨true, .lt⟩ - | _, .letE .. => return ⟨true, .gt⟩ - | .lit x, .lit y => return ⟨true, compare x y⟩ - | .lit .., _ => return ⟨true, .lt⟩ - | _, .lit .. => return ⟨true, .gt⟩ - | .proj tnx ix tx, .proj tny iy ty => do - let tn <- match ctx.find? tnx, ctx.find? tny with + | .letE .., _ => pure ⟨true, .lt⟩ + | _, .letE .. => pure ⟨true, .gt⟩ + | .lit x _, .lit y _ => pure ⟨true, compare x y⟩ + | .lit .., _ => pure ⟨true, .lt⟩ + | _, .lit .. => pure ⟨true, .gt⟩ + | .proj tnx ix tx _, .proj tny iy ty _ => do + let tn ← match ctx.find? tnx, ctx.find? tny with | some nx, some ny => pure ⟨false, compare nx ny⟩ | none, some _ => pure ⟨true, .gt⟩ | some _, none => pure ⟨true, .lt⟩ | none, none => if tnx == tny then pure ⟨true, .eq⟩ else do - let x' <- compileReference tnx - let y' <- compileReference tny + let x' ← lookupConstAddr tnx + let y' ← lookupConstAddr tny pure ⟨true, compare x' y'⟩ SOrder.cmpM (pure tn) <| - SOrder.cmpM (pure ⟨true, compare ix iy⟩) <| - (compareExpr ctx xlvls ylvls tx ty) + SOrder.cmpM (pure ⟨true, compare ix iy⟩) + (compareExpr ctx xlvls ylvls tx ty) + +/-! ## Constant Comparison -/ -/-- ast comparison of two lean definitions. --/ -def compareConst (ctx: MutCtx) (x y: MutConst) - : CompileM Ordering := do - --dbg_trace "compareConst" +/-- Compare two mutual constants for ordering. -/ +def compareConst (ctx : Ix.MutCtx) (x y : MutConst) : CompileM Ordering := do let key := match compare x.name y.name with | .lt => (x.name, y.name) | _ => (y.name, x.name) - match (<- get).constCmp.find? key with - | some o => return o - | none => do - let sorder: SOrder <- match x,y with - | .defn x, .defn y => compareDef x y - | .defn _, _ => pure ⟨true, Ordering.lt⟩ - | .indc x, .indc y => compareInd x y - | .indc _, _ => pure ⟨true, Ordering.lt⟩ - | .recr x, .recr y => compareRecr x y - | .recr _, _ => pure ⟨true, Ordering.lt⟩ - if sorder.strong then modify fun stt => { stt with - constCmp := stt.constCmp.insert key sorder.ord - } - return sorder.ord - where - compareDef (x y: Def) : CompileM SOrder := do - SOrder.cmpM (pure ⟨true, compare x.kind y.kind⟩) <| - SOrder.cmpM (pure ⟨true, compare x.levelParams.length y.levelParams.length⟩) <| - SOrder.cmpM (compareExpr ctx x.levelParams y.levelParams x.type y.type) - (compareExpr ctx x.levelParams y.levelParams x.value y.value) - compareInd (x y: Ind) : CompileM SOrder := do - SOrder.cmpM (pure ⟨true, compare x.levelParams.length y.levelParams.length⟩) <| + -- Check cache + let cache ← getBlockState + if let some o := cache.cmpCache.get? key then + return o + + let sorder : SOrder ← match x, y with + | .defn x, .defn y => compareDef ctx x y + | .defn _, _ => pure ⟨true, .lt⟩ + | .indc x, .indc y => compareInd ctx x y + | .indc _, _ => pure ⟨true, .lt⟩ + | .recr x, .recr y => compareRecr ctx x y + | .recr _, _ => pure ⟨true, .lt⟩ + + -- Cache if strong ordering + if sorder.strong then + modifyBlockState fun c => { c with cmpCache := c.cmpCache.insert key sorder.ord } + pure sorder.ord +where + compareDef (ctx : Ix.MutCtx) (x y : Def) : CompileM SOrder := do + SOrder.cmpM (pure ⟨true, compare x.kind y.kind⟩) <| + SOrder.cmpM (pure ⟨true, compare x.levelParams.size y.levelParams.size⟩) <| + SOrder.cmpM (compareExpr ctx x.levelParams.toList y.levelParams.toList x.type y.type) + (compareExpr ctx x.levelParams.toList y.levelParams.toList x.value y.value) + compareInd (ctx : Ix.MutCtx) (x y : Ind) : CompileM SOrder := do + SOrder.cmpM (pure ⟨true, compare x.levelParams.size y.levelParams.size⟩) <| + SOrder.cmpM (pure ⟨true, compare x.numParams y.numParams⟩) <| + SOrder.cmpM (pure ⟨true, compare x.numIndices y.numIndices⟩) <| + SOrder.cmpM (pure ⟨true, compare x.ctors.size y.ctors.size⟩) <| + SOrder.cmpM (compareExpr ctx x.levelParams.toList y.levelParams.toList x.type y.type) + (SOrder.zipM (compareCtor ctx x.levelParams.toList y.levelParams.toList) x.ctors.toList y.ctors.toList) + compareCtor (ctx : Ix.MutCtx) (xlvls ylvls : List Name) + (x y : ConstructorVal) : CompileM SOrder := do + -- Cache key: normalize to (smaller, larger) pair + let key := match compare x.cnst.name y.cnst.name with + | .lt => (x.cnst.name, y.cnst.name) + | _ => (y.cnst.name, x.cnst.name) + -- Check cache first + let cache ← getBlockState + if let some o := cache.cmpCache.get? key then + return ⟨true, o⟩ + -- Compute comparison + let sorder ← + SOrder.cmpM (pure ⟨true, compare x.cnst.levelParams.size y.cnst.levelParams.size⟩) <| + SOrder.cmpM (pure ⟨true, compare x.cidx y.cidx⟩) <| SOrder.cmpM (pure ⟨true, compare x.numParams y.numParams⟩) <| - SOrder.cmpM (pure ⟨true, compare x.numIndices y.numIndices⟩) <| - SOrder.cmpM (pure ⟨true, compare x.ctors.length y.ctors.length⟩) <| - SOrder.cmpM (compareExpr ctx x.levelParams y.levelParams x.type y.type) <| - (SOrder.zipM compareCtor x.ctors y.ctors) - compareCtor (x y: Lean.ConstructorVal) : CompileM SOrder := do - let key := match compare x.name y.name with - | .lt => (x.name, y.name) - | _ => (y.name, x.name) - match (<- get).constCmp.find? key with - | some o => return ⟨true, o⟩ - | none => do - let sorder <- do - SOrder.cmpM (pure ⟨true, compare x.levelParams.length y.levelParams.length⟩) <| - SOrder.cmpM (pure ⟨true, compare x.cidx y.cidx⟩) <| - SOrder.cmpM (pure ⟨true, compare x.numParams y.numParams⟩) <| - SOrder.cmpM (pure ⟨true, compare x.numFields y.numFields⟩) <| - (compareExpr ctx x.levelParams y.levelParams x.type y.type) - if sorder.strong then modify fun stt => { stt with - constCmp := stt.constCmp.insert key sorder.ord - } - return sorder - compareRecr (x y: Lean.RecursorVal) : CompileM SOrder := do - SOrder.cmpM (pure ⟨true, compare x.levelParams.length y.levelParams.length⟩) <| - SOrder.cmpM (pure ⟨true,compare x.numParams y.numParams⟩) <| - SOrder.cmpM (pure ⟨true,compare x.numIndices y.numIndices⟩) <| - SOrder.cmpM (pure ⟨true, compare x.numMotives y.numMotives⟩) <| - SOrder.cmpM (pure ⟨true,compare x.numMinors y.numMinors⟩) <| - SOrder.cmpM (pure ⟨true, compare x.k y.k⟩) <| - SOrder.cmpM (compareExpr ctx x.levelParams y.levelParams x.type y.type) <| - (SOrder.zipM (compareRule x.levelParams y.levelParams) x.rules y.rules) - compareRule (xlvls ylvls: List Lean.Name) (x y: Lean.RecursorRule) - : CompileM SOrder := do - SOrder.cmpM (pure ⟨true, compare x.nfields y.nfields⟩) - (compareExpr ctx xlvls ylvls x.rhs y.rhs) - -def eqConst (ctx: MutCtx) (x y: MutConst) : CompileM Bool := - (fun o => o == .eq) <$> compareConst ctx x y - -/-- `sortConsts` recursively sorts a list of mutually referential constants into -ordered equivalence classes. For most cases equivalence can be determined by -syntactic differences in the definitions, but when two definitions -refer to one another in the same syntactical position the classification can -be self-referential. Therefore we use a partition refinement algorithm that -starts by assuming that all definitions in the mutual block are equal and -recursively improves our classification by sorting based on syntax: -``` -classes₀ := [startConsts] -classes₁ := sortConsts classes₀ -classes₂ := sortConsts classes₁ -classes₍ᵢ₊₁₎ := sortConsts classesᵢ ... -``` -Eventually we reach a fixed-point where `classes₍ᵢ₊₁₎ := classesᵢ` and no -further refinement is possible (trivially when each const is in its own class). --/ -partial def sortConsts (classes: List MutConst) : CompileM (List (List MutConst)) - := go [List.sortBy (compare ·.name ·.name) classes] - where - go (cs: List (List MutConst)): CompileM (List (List MutConst)) := do - --dbg_trace "sortConsts {(<- read).current} {cs.map (·.map (·.name))}" - let ctx := MutConst.ctx cs - let cs' <- cs.mapM fun ds => - match ds with - | [] => throw <| .badMutualBlock cs - | [d] => pure [[d]] - | ds => ds.sortByM (compareConst ctx) >>= List.groupByM (eqConst ctx) - let cs' := cs'.flatten.map (List.sortBy (compare ·.name ·.name)) - if cs == cs' then return cs' else go cs' - -/-- Compile a mutual block --/ -partial def compileMutConsts: List (List MutConst) - -> CompileM (Ixon × Map Address Address) -| classes => do - --dbg_trace s!"compileMutConsts {(<- read).current} {repr <| classes.map (·.map (·.name))} mutCtx: {repr (<- read).mutCtx}" - let mut dat := #[] - let mut met := {} - -- iterate through each equivalence class + SOrder.cmpM (pure ⟨true, compare x.numFields y.numFields⟩) + (compareExpr ctx xlvls ylvls x.cnst.type y.cnst.type) + -- Cache if strong ordering + if sorder.strong then + modifyBlockState fun c => { c with cmpCache := c.cmpCache.insert key sorder.ord } + return sorder + compareRecr (ctx : Ix.MutCtx) (x y : RecursorVal) : CompileM SOrder := do + SOrder.cmpM (pure ⟨true, compare x.cnst.levelParams.size y.cnst.levelParams.size⟩) <| + SOrder.cmpM (pure ⟨true, compare x.numParams y.numParams⟩) <| + SOrder.cmpM (pure ⟨true, compare x.numIndices y.numIndices⟩) <| + SOrder.cmpM (pure ⟨true, compare x.numMotives y.numMotives⟩) <| + SOrder.cmpM (pure ⟨true, compare x.numMinors y.numMinors⟩) <| + SOrder.cmpM (pure ⟨true, compare x.k y.k⟩) <| + SOrder.cmpM (compareExpr ctx x.cnst.levelParams.toList y.cnst.levelParams.toList x.cnst.type y.cnst.type) + (SOrder.zipM (compareRule ctx x.cnst.levelParams.toList y.cnst.levelParams.toList) x.rules.toList y.rules.toList) + compareRule (ctx : Ix.MutCtx) (xlvls ylvls : List Name) + (x y : RecursorRule) : CompileM SOrder := do + SOrder.cmpM (pure ⟨true, compare x.nfields y.nfields⟩) + (compareExpr ctx xlvls ylvls x.rhs y.rhs) + +/-- Check if two mutual constants are equal (for grouping). -/ +def eqConst (ctx : Ix.MutCtx) (x y : MutConst) : CompileM Bool := + (· == .eq) <$> compareConst ctx x y + +/-! ## sortConsts Fixed-Point Algorithm -/ + +/-- Create a MutConst.indc from an InductiveVal by fetching constructors. -/ +def MutConst.mkIndc (i : InductiveVal) : CompileM MutConst := do + let mut ctors : Array ConstructorVal := #[] + for ctorName in i.ctors do + let c ← getCtor ctorName + ctors := ctors.push c + pure (.indc ⟨i.cnst.name, i.cnst.levelParams, i.cnst.type, i.numParams, i.numIndices, i.all, + ctors, i.numNested, i.isRec, i.isReflexive, i.isUnsafe⟩) +where + getCtor (name : Name) : CompileM ConstructorVal := do + match ← findConst name with + | .ctorInfo c => pure c + | _ => throw (.invalidMutualBlock s!"Expected constructor: {name}") + +/-- Sort mutual constants into ordered equivalence classes. + Uses partition refinement - starts assuming all equal, + recursively improves until fixed-point. -/ +partial def sortConsts (classes : List MutConst) : CompileM (List (List MutConst)) := + go [List.sortBy (compare ·.name ·.name) classes] +where + go (cs : List (List MutConst)) : CompileM (List (List MutConst)) := do + let ctx := MutConst.ctx cs + let cs' ← cs.mapM fun ds => + match ds with + | [] => throw (.invalidMutualBlock "empty class in sortConsts") + | [d] => pure [[d]] + | ds => ds.sortByM (compareConst ctx) >>= List.groupByM (eqConst ctx) + let cs' := cs'.flatten.map (List.sortBy (compare ·.name ·.name)) + if cs == cs' then pure cs' else go cs' + +/-! ## Constant Building -/ + +/-- Count Share references in an expression (for debugging). -/ +partial def countShareRefs : Ixon.Expr → Nat + | .share _ => 1 + | .prj _ _ val => countShareRefs val + | .app f a => countShareRefs f + countShareRefs a + | .lam ty body => countShareRefs ty + countShareRefs body + | .all ty body => countShareRefs ty + countShareRefs body + | .letE _ ty val body => countShareRefs ty + countShareRefs val + countShareRefs body + | _ => 0 + +/-- Update recursor rules with rewritten expressions starting at given index. + Returns updated rules and next index. -/ +def updateRecursorRules (rules : Array Ixon.RecursorRule) (rewrittenExprs : Array Ixon.Expr) (startIdx : Nat) + : Array Ixon.RecursorRule × Nat := Id.run do + let mut result := rules + let mut idx := startIdx + for i in [:rules.size] do + if let some rhs := rewrittenExprs[idx]? then + result := result.set! i { result[i]! with rhs } + idx := idx + 1 + (result, idx) + +/-- Update inductive constructor types with rewritten expressions starting at given index. + Returns updated constructors and next index. -/ +def updateConstructorTypes (ctors : Array Ixon.Constructor) (rewrittenExprs : Array Ixon.Expr) (startIdx : Nat) + : Array Ixon.Constructor × Nat := Id.run do + let mut result := ctors + let mut idx := startIdx + for i in [:ctors.size] do + if let some ctorTyp := rewrittenExprs[idx]? then + result := result.set! i { result[i]! with typ := ctorTyp } + idx := idx + 1 + (result, idx) + +/-- Update Ixon MutConsts with rewritten expressions. -/ +def updateMutConsts (ms : Array Ixon.MutConst) (rewrittenExprs : Array Ixon.Expr) + : Array Ixon.MutConst := Id.run do + let mut idx := 0 + let mut result := ms + for i in [:ms.size] do + match ms[i]! with + | .indc ind => + let typ := rewrittenExprs[idx]?.getD ind.typ + idx := idx + 1 + let (ctors, nextIdx) := updateConstructorTypes ind.ctors rewrittenExprs idx + idx := nextIdx + result := result.set! i (.indc { ind with typ, ctors }) + | .defn d => + let typ := rewrittenExprs[idx]?.getD d.typ + let value := rewrittenExprs[idx + 1]?.getD d.value + idx := idx + 2 + result := result.set! i (.defn { d with typ, value }) + | .recr r => + let typ := rewrittenExprs[idx]?.getD r.typ + idx := idx + 1 + let (rules, nextIdx) := updateRecursorRules r.rules rewrittenExprs idx + idx := nextIdx + result := result.set! i (.recr { r with typ, rules }) + result + +/-- Apply sharing analysis to expressions and build a Constant. -/ +def buildConstantWithSharing (info : Ixon.ConstantInfo) (rootExprs : Array Ixon.Expr) + (refs : Array Address) (univs : Array Ixon.Univ) (dbg : Bool := false) : Ixon.Constant := Id.run do + let (rewrittenExprs, sharingVec) := Sharing.applySharing rootExprs dbg + -- Debug: count Share refs in rewritten expressions + if dbg && sharingVec.size > 0 then + let totalShareRefs := rewrittenExprs.foldl (fun acc e => acc + countShareRefs e) 0 + dbg_trace s!"[buildConstant] sharingVec.size={sharingVec.size}, totalShareRefs in rewritten={totalShareRefs}" + -- Update expressions in info with rewritten versions + let info' := match info with + | .defn d => + let typ := rewrittenExprs[0]?.getD d.typ + let value := rewrittenExprs[1]?.getD d.value + Ixon.ConstantInfo.defn { d with typ, value } + | .axio a => + let typ := rewrittenExprs[0]?.getD a.typ + Ixon.ConstantInfo.axio { a with typ } + | .quot q => + let typ := rewrittenExprs[0]?.getD q.typ + Ixon.ConstantInfo.quot { q with typ } + | .recr r => + let typ := rewrittenExprs[0]?.getD r.typ + let (rules, _) := updateRecursorRules r.rules rewrittenExprs 1 + Ixon.ConstantInfo.recr { r with typ, rules } + | .muts ms => + Ixon.ConstantInfo.muts (updateMutConsts ms rewrittenExprs) + | other => other + return { info := info', sharing := sharingVec, refs, univs } + +/-! ## Individual Constant Compilation -/ + +/-- Convert Lean DefinitionSafety to Ixon DefinitionSafety -/ +def convertSafety : Lean.DefinitionSafety → DefinitionSafety + | .unsafe => .unsaf + | .safe => .safe + | .partial => .part + +/-- Compile a definition to Ixon.Definition with metadata. -/ +def compileDefinition (d : DefinitionVal) : CompileM (Ixon.Definition × Ixon.ConstantMeta × Ixon.Expr × Ixon.Expr) := do + withUnivCtx d.cnst.levelParams.toList do + resetArena + let (typeExpr, typeRoot) ← compileExpr d.cnst.type + let (valueExpr, valueRoot) ← compileExpr d.value + let arena ← takeArena + clearExprCache + + -- Store name string components as blobs for deduplication + compileName d.cnst.name + for lvl in d.cnst.levelParams do compileName lvl + for n in d.all do compileName n + for (n, _) in (← getBlockEnv).mutCtx.toList do compileName n + + let nameAddr := d.cnst.name.getHash + let lvlAddrs := d.cnst.levelParams.map (·.getHash) + let allAddrs := d.all.map (·.getHash) + let ctxAddrs ← getMutCtxAddrs + + let defn : Ixon.Definition := { + kind := .defn + safety := convertSafety d.safety + lvls := d.cnst.levelParams.size.toUInt64 + typ := typeExpr + value := valueExpr + } + let constMeta := Ixon.ConstantMeta.defn nameAddr lvlAddrs d.hints allAddrs ctxAddrs arena typeRoot valueRoot + pure (defn, constMeta, typeExpr, valueExpr) + +/-- Compile a theorem to Ixon.Definition with metadata. -/ +def compileTheorem (d : TheoremVal) : CompileM (Ixon.Definition × Ixon.ConstantMeta × Ixon.Expr × Ixon.Expr) := do + withUnivCtx d.cnst.levelParams.toList do + resetArena + let (typeExpr, typeRoot) ← compileExpr d.cnst.type + let (valueExpr, valueRoot) ← compileExpr d.value + let arena ← takeArena + clearExprCache + + -- Store name string components as blobs for deduplication + compileName d.cnst.name + for lvl in d.cnst.levelParams do compileName lvl + for n in d.all do compileName n + for (n, _) in (← getBlockEnv).mutCtx.toList do compileName n + + let nameAddr := d.cnst.name.getHash + let lvlAddrs := d.cnst.levelParams.map (·.getHash) + let allAddrs := d.all.map (·.getHash) + let ctxAddrs ← getMutCtxAddrs + + let defn : Ixon.Definition := { + kind := .thm + safety := .safe + lvls := d.cnst.levelParams.size.toUInt64 + typ := typeExpr + value := valueExpr + } + let constMeta := Ixon.ConstantMeta.defn nameAddr lvlAddrs .opaque allAddrs ctxAddrs arena typeRoot valueRoot + pure (defn, constMeta, typeExpr, valueExpr) + +/-- Compile an opaque to Ixon.Definition with metadata. -/ +def compileOpaque (d : OpaqueVal) : CompileM (Ixon.Definition × Ixon.ConstantMeta × Ixon.Expr × Ixon.Expr) := do + withUnivCtx d.cnst.levelParams.toList do + resetArena + let (typeExpr, typeRoot) ← compileExpr d.cnst.type + let (valueExpr, valueRoot) ← compileExpr d.value + let arena ← takeArena + clearExprCache + + -- Store name string components as blobs for deduplication + compileName d.cnst.name + for lvl in d.cnst.levelParams do compileName lvl + for n in d.all do compileName n + for (n, _) in (← getBlockEnv).mutCtx.toList do compileName n + + let nameAddr := d.cnst.name.getHash + let lvlAddrs := d.cnst.levelParams.map (·.getHash) + let allAddrs := d.all.map (·.getHash) + let ctxAddrs ← getMutCtxAddrs + + let defn : Ixon.Definition := { + kind := .opaq + safety := if d.isUnsafe then .unsaf else .safe + lvls := d.cnst.levelParams.size.toUInt64 + typ := typeExpr + value := valueExpr + } + let constMeta := Ixon.ConstantMeta.defn nameAddr lvlAddrs .opaque allAddrs ctxAddrs arena typeRoot valueRoot + pure (defn, constMeta, typeExpr, valueExpr) + +/-- Compile an axiom to Ixon.Axiom with metadata. -/ +def compileAxiom (a : AxiomVal) : CompileM (Ixon.Axiom × Ixon.ConstantMeta × Ixon.Expr) := do + withUnivCtx a.cnst.levelParams.toList do + resetArena + let (typeExpr, typeRoot) ← compileExpr a.cnst.type + let arena ← takeArena + clearExprCache + + -- Store name string components as blobs for deduplication + compileName a.cnst.name + for lvl in a.cnst.levelParams do compileName lvl + + let nameAddr := a.cnst.name.getHash + let lvlAddrs := a.cnst.levelParams.map (·.getHash) + + let axio : Ixon.Axiom := { + isUnsafe := a.isUnsafe + lvls := a.cnst.levelParams.size.toUInt64 + typ := typeExpr + } + let constMeta := Ixon.ConstantMeta.axio nameAddr lvlAddrs arena typeRoot + pure (axio, constMeta, typeExpr) + +/-- Compile a quotient to Ixon.Quotient with metadata. -/ +def compileQuotient (q : QuotVal) : CompileM (Ixon.Quotient × Ixon.ConstantMeta × Ixon.Expr) := do + withUnivCtx q.cnst.levelParams.toList do + resetArena + let (typeExpr, typeRoot) ← compileExpr q.cnst.type + let arena ← takeArena + clearExprCache + + -- Store name string components as blobs for deduplication + compileName q.cnst.name + for lvl in q.cnst.levelParams do compileName lvl + + let nameAddr := q.cnst.name.getHash + let lvlAddrs := q.cnst.levelParams.map (·.getHash) + + let kind : QuotKind := match q.kind with + | .type => .type + | .ctor => .ctor + | .lift => .lift + | .ind => .ind + let quot : Ixon.Quotient := { + kind + lvls := q.cnst.levelParams.size.toUInt64 + typ := typeExpr + } + let constMeta := Ixon.ConstantMeta.quot nameAddr lvlAddrs arena typeRoot + pure (quot, constMeta, typeExpr) + +/-- Compile a recursor rule to Ixon, returning the ctor address and rhs expression. -/ +def compileRecursorRule (rule : RecursorRule) : CompileM (Ixon.RecursorRule × Address × UInt64) := do + let (rhs, ruleRoot) ← compileExpr rule.rhs + let ctorAddr := rule.ctor.getHash + pure ({ fields := rule.nfields.toUInt64, rhs }, ctorAddr, ruleRoot) + +/-- Compile a recursor to Ixon.Recursor with metadata. -/ +def compileRecursor (r : RecursorVal) : CompileM (Ixon.Recursor × Ixon.ConstantMeta × Ixon.Expr) := do + withUnivCtx r.cnst.levelParams.toList do + resetArena + let (typeExpr, typeRoot) ← compileExpr r.cnst.type + + let mut rules : Array Ixon.RecursorRule := #[] + let mut ruleAddrs : Array Address := #[] + let mut ruleRoots : Array UInt64 := #[] + for rule in r.rules do + let (ixonRule, ctorAddr, ruleRoot) ← compileRecursorRule rule + rules := rules.push ixonRule + ruleAddrs := ruleAddrs.push ctorAddr + ruleRoots := ruleRoots.push ruleRoot + + let arena ← takeArena + clearExprCache + + -- Store name string components as blobs for deduplication + compileName r.cnst.name + for lvl in r.cnst.levelParams do compileName lvl + for n in r.all do compileName n + for (n, _) in (← getBlockEnv).mutCtx.toList do compileName n + for rule in r.rules do compileName rule.ctor + + let nameAddr := r.cnst.name.getHash + let lvlAddrs := r.cnst.levelParams.map (·.getHash) + let allAddrs := r.all.map (·.getHash) + let ctxAddrs ← getMutCtxAddrs + + let recursor : Ixon.Recursor := { + k := r.k + isUnsafe := r.isUnsafe + lvls := r.cnst.levelParams.size.toUInt64 + params := r.numParams.toUInt64 + indices := r.numIndices.toUInt64 + motives := r.numMotives.toUInt64 + minors := r.numMinors.toUInt64 + typ := typeExpr + rules := rules + } + let constMeta := Ixon.ConstantMeta.recr nameAddr lvlAddrs ruleAddrs allAddrs ctxAddrs arena typeRoot ruleRoots + pure (recursor, constMeta, typeExpr) + +/-- Compile a constructor to Ixon.Constructor with metadata (ConstantMeta.ctor). -/ +def compileConstructor (c : ConstructorVal) : CompileM (Ixon.Constructor × Ixon.ConstantMeta × Ixon.Expr) := do + resetArena + let (typeExpr, typeRoot) ← compileExpr c.cnst.type + let arena ← takeArena + clearExprCache + + -- Store name string components as blobs for deduplication + compileName c.cnst.name + for lvl in c.cnst.levelParams do compileName lvl + + let nameAddr := c.cnst.name.getHash + let lvlAddrs := c.cnst.levelParams.map (·.getHash) + + let ctor : Ixon.Constructor := { + isUnsafe := c.isUnsafe + lvls := c.cnst.levelParams.size.toUInt64 + cidx := c.cidx.toUInt64 + params := c.numParams.toUInt64 + fields := c.numFields.toUInt64 + typ := typeExpr + } + let ctorMeta := Ixon.ConstantMeta.ctor nameAddr lvlAddrs c.induct.getHash arena typeRoot + pure (ctor, ctorMeta, typeExpr) + +/-- Compile an inductive to Ixon.Inductive with metadata. + Takes the inductive and its constructors (looked up from Ix.Environment). + Returns (inductive, indc meta, ctor metas with names, all exprs). -/ +def compileInductive (i : InductiveVal) (ctorVals : Array ConstructorVal) + : CompileM (Ixon.Inductive × Ixon.ConstantMeta × Array (Name × Ixon.ConstantMeta) × Array Ixon.Expr) := do + withUnivCtx i.cnst.levelParams.toList do + resetArena + let (typeExpr, typeRoot) ← compileExpr i.cnst.type + let arena ← takeArena + clearExprCache + + let mut ctors : Array Ixon.Constructor := #[] + let mut ctorMetaPairs : Array (Name × Ixon.ConstantMeta) := #[] + let mut ctorNameAddrs : Array Address := #[] + let mut ctorExprs : Array Ixon.Expr := #[typeExpr] + for ctorVal in ctorVals do + let (c, cm, e) ← compileConstructor ctorVal + ctors := ctors.push c + ctorMetaPairs := ctorMetaPairs.push (ctorVal.cnst.name, cm) + ctorNameAddrs := ctorNameAddrs.push ctorVal.cnst.name.getHash + ctorExprs := ctorExprs.push e + + -- Store name string components as blobs for deduplication + compileName i.cnst.name + for lvl in i.cnst.levelParams do compileName lvl + for n in i.all do compileName n + for (n, _) in (← getBlockEnv).mutCtx.toList do compileName n + + let nameAddr := i.cnst.name.getHash + let lvlAddrs := i.cnst.levelParams.map (·.getHash) + let allAddrs := i.all.map (·.getHash) + let ctxAddrs ← getMutCtxAddrs + + let ind : Ixon.Inductive := { + recr := i.isRec + refl := i.isReflexive + isUnsafe := i.isUnsafe + lvls := i.cnst.levelParams.size.toUInt64 + params := i.numParams.toUInt64 + indices := i.numIndices.toUInt64 + nested := i.numNested.toUInt64 + typ := typeExpr + ctors := ctors + } + let constMeta := Ixon.ConstantMeta.indc nameAddr lvlAddrs ctorNameAddrs allAddrs ctxAddrs arena typeRoot + pure (ind, constMeta, ctorMetaPairs, ctorExprs) + +/-! ## Internal compilation helpers for mutual blocks -/ + +/-- Compile definition data for a Def structure (from Mutual.lean). -/ +def compileDefinitionData (d : Def) : CompileM (Ixon.Definition × Ixon.ConstantMeta × Ixon.Expr × Ixon.Expr) := do + withUnivCtx d.levelParams.toList do + resetArena + let (typeExpr, typeRoot) ← compileExpr d.type + let (valueExpr, valueRoot) ← compileExpr d.value + let arena ← takeArena + clearExprCache + + -- Store name components for deduplication + compileName d.name + for lvl in d.levelParams do compileName lvl + for n in d.all do compileName n + for (n, _) in (← getBlockEnv).mutCtx.toList do compileName n + + let nameAddr := d.name.getHash + let lvlAddrs := d.levelParams.map (·.getHash) + let allAddrs := d.all.map (·.getHash) + let ctxAddrs ← getMutCtxAddrs + + let defn : Ixon.Definition := { + kind := d.kind + safety := d.safety + lvls := d.levelParams.size.toUInt64 + typ := typeExpr + value := valueExpr + } + let hints := match d.kind with + | .defn => d.hints + | .thm => .opaque + | .opaq => .opaque + let constMeta := Ixon.ConstantMeta.defn nameAddr lvlAddrs hints allAddrs ctxAddrs arena typeRoot valueRoot + pure (defn, constMeta, typeExpr, valueExpr) + +/-- Compile inductive data for an Ind structure (from Mutual.lean). + Returns (inductive, indc meta, ctor metas with names, all exprs). -/ +def compileInductiveData (i : Ind) + : CompileM (Ixon.Inductive × Ixon.ConstantMeta × Array (Name × Ixon.ConstantMeta) × Array Ixon.Expr) := do + withUnivCtx i.levelParams.toList do + resetArena + let (typeExpr, typeRoot) ← compileExpr i.type + let arena ← takeArena + clearExprCache + + let mut ctors : Array Ixon.Constructor := #[] + let mut ctorMetaPairs : Array (Name × Ixon.ConstantMeta) := #[] + let mut ctorNameAddrs : Array Address := #[] + let mut ctorExprs : Array Ixon.Expr := #[typeExpr] + for ctorVal in i.ctors do + let (c, cm, e) ← compileConstructor ctorVal + ctors := ctors.push c + ctorMetaPairs := ctorMetaPairs.push (ctorVal.cnst.name, cm) + ctorNameAddrs := ctorNameAddrs.push ctorVal.cnst.name.getHash + ctorExprs := ctorExprs.push e + + -- Store name components for deduplication + compileName i.name + for lvl in i.levelParams do compileName lvl + for n in i.all do compileName n + for (n, _) in (← getBlockEnv).mutCtx.toList do compileName n + + let nameAddr := i.name.getHash + let lvlAddrs := i.levelParams.map (·.getHash) + let allAddrs := i.all.map (·.getHash) + let ctxAddrs ← getMutCtxAddrs + + let ind : Ixon.Inductive := { + recr := i.isRec + refl := i.isReflexive + isUnsafe := i.isUnsafe + lvls := i.levelParams.size.toUInt64 + params := i.numParams.toUInt64 + indices := i.numIndices.toUInt64 + nested := i.numNested.toUInt64 + typ := typeExpr + ctors := ctors + } + let constMeta := Ixon.ConstantMeta.indc nameAddr lvlAddrs ctorNameAddrs allAddrs ctxAddrs arena typeRoot + pure (ind, constMeta, ctorMetaPairs, ctorExprs) + +/-- Compile recursor data for a RecursorVal. -/ +def compileRecursorData (r : RecursorVal) : CompileM (Ixon.Recursor × Ixon.ConstantMeta × Ixon.Expr) := do + withUnivCtx r.cnst.levelParams.toList do + resetArena + let (typeExpr, typeRoot) ← compileExpr r.cnst.type + + let mut rules : Array Ixon.RecursorRule := #[] + let mut ruleAddrs : Array Address := #[] + let mut ruleRoots : Array UInt64 := #[] + for rule in r.rules do + let (ixonRule, ctorAddr, ruleRoot) ← compileRecursorRule rule + rules := rules.push ixonRule + ruleAddrs := ruleAddrs.push ctorAddr + ruleRoots := ruleRoots.push ruleRoot + + let arena ← takeArena + clearExprCache + + -- Store name string components as blobs for deduplication + compileName r.cnst.name + for lvl in r.cnst.levelParams do compileName lvl + for n in r.all do compileName n + for (n, _) in (← getBlockEnv).mutCtx.toList do compileName n + for rule in r.rules do compileName rule.ctor + + let nameAddr := r.cnst.name.getHash + let lvlAddrs := r.cnst.levelParams.map (·.getHash) + let allAddrs := r.all.map (·.getHash) + let ctxAddrs ← getMutCtxAddrs + + let recursor : Ixon.Recursor := { + k := r.k + isUnsafe := r.isUnsafe + lvls := r.cnst.levelParams.size.toUInt64 + params := r.numParams.toUInt64 + indices := r.numIndices.toUInt64 + motives := r.numMotives.toUInt64 + minors := r.numMinors.toUInt64 + typ := typeExpr + rules := rules + } + let constMeta := Ixon.ConstantMeta.recr nameAddr lvlAddrs ruleAddrs allAddrs ctxAddrs arena typeRoot ruleRoots + pure (recursor, constMeta, typeExpr) + +/-! ## Mutual Block Compilation -/ + +/-- Compile sorted equivalence classes of mutual constants. + Returns compiled constants, all root expressions, and metadata for each constant. -/ +def compileMutConsts (classes : List (List MutConst)) + : CompileM (Array Ixon.MutConst × Array Ixon.Expr × Array (Name × Ixon.ConstantMeta)) := do + let mut dat : Array Ixon.MutConst := #[] + let mut allExprs : Array Ixon.Expr := #[] + let mut allMetas : Array (Name × Ixon.ConstantMeta) := #[] + + -- Only push one representative per equivalence class into dat, + -- since alpha-equivalent constants compile to identical data and share + -- the same class index in MutConst.ctx. for constClass in classes do - let mut classData := #[] - -- compile each constant in a class + let mut representativePushed := false for const in constClass do match const with - | .indc x => do - let (i, m) <- .withCurrent x.name <| compileIndc x - classData := classData.push (.indc i) - met := met.union m - | .defn x => do - let (d, m) <- .withCurrent x.name <| compileDefn x - classData := classData.push (.defn d) - met := met.insert (<- compileName x.name) (<- storeMeta m) - | .recr x => do - let (r, m) <- .withCurrent x.name <| compileRecr x - classData := classData.push (.recr r) - met := met.insert (<- compileName x.name) (<- storeMeta m) - -- make sure we have no empty classes and all defs in a class are equal - match classData.toList with - | [] => throw (.badMutualBlock classes) - | [x] => dat := dat.push x - | x::xs => - if xs.foldr (fun y acc => (y == x) && acc) true - then dat := dat.push x - else throw (.badMutualBlock classes) - pure (.muts dat.toList, met) - -def compileMutual : MutConst -> CompileM (Ixon × Ixon) -| const => do - --dbg_trace s!"compileMutual {const.name}" - let all := (<- read).all ---dbg_trace s!"compileMutual {const.name} all: {repr all}" - if all == {const.name} && - (const matches .defn _ || const matches .recr _) then do + | .indc i => do + let (ind, constMeta, ctorMetaPairs, exprs) ← withCurrent i.name (compileInductiveData i) + if !representativePushed then + dat := dat.push (Ixon.MutConst.indc ind) + for e in exprs do + allExprs := allExprs.push e + representativePushed := true + allMetas := allMetas.push (i.name, constMeta) + for (ctorName, ctorMeta) in ctorMetaPairs do + allMetas := allMetas.push (ctorName, ctorMeta) + | .defn d => do + let (defn, constMeta, tExpr, vExpr) ← withCurrent d.name (compileDefinitionData d) + if !representativePushed then + dat := dat.push (Ixon.MutConst.defn defn) + allExprs := allExprs.push tExpr + allExprs := allExprs.push vExpr + representativePushed := true + allMetas := allMetas.push (d.name, constMeta) + | .recr r => do + let (recursor, constMeta, tExpr) ← withCurrent r.cnst.name (compileRecursorData r) + if !representativePushed then + dat := dat.push (Ixon.MutConst.recr recursor) + allExprs := allExprs.push tExpr + for rule in recursor.rules do + allExprs := allExprs.push rule.rhs + representativePushed := true + allMetas := allMetas.push (r.cnst.name, constMeta) + + pure (dat, allExprs, allMetas) + +/-- Compile a mutual block and create projections for each constant. + Returns the Muts block constant and projections for each name with metadata. -/ +def compileMutualBlock (classes : List (List MutConst)) + : CompileM BlockResult := do + let mutCtx := MutConst.ctx classes + withMutCtx mutCtx do + let (mutConsts, allExprs, allMetas) ← compileMutConsts classes + let cache ← getBlockState + let block := buildConstantWithSharing (.muts mutConsts) allExprs cache.refs cache.univs + + -- Serialize once and compute block address + let blockBytes := Ixon.ser block + let blockAddr := Address.blake3 blockBytes + + -- Build a lookup map from name to metadata + let metaMap : Std.HashMap Name Ixon.ConstantMeta := allMetas.foldl (init := {}) fun m (n, constMeta) => m.insert n constMeta + + -- Create projections for each constant + let mut projections : Array (Name × Ixon.Constant × Ixon.ConstantMeta) := #[] + let mut idx : UInt64 := 0 + for constClass in classes do + for const in constClass do + let projInfo : Ixon.ConstantInfo := match const with + | .defn _ => .dPrj ⟨idx, blockAddr⟩ + | .indc _ => .iPrj ⟨idx, blockAddr⟩ + | .recr _ => .rPrj ⟨idx, blockAddr⟩ + let proj : Ixon.Constant := ⟨projInfo, #[], #[], #[]⟩ + let constMeta := metaMap.get? const.name |>.getD .empty + projections := projections.push (const.name, proj, constMeta) + + -- For inductives, also create constructor projections + if let .indc i := const then + let mut cidx : UInt64 := 0 + for ctor in i.ctors do + let ctorProjInfo : Ixon.ConstantInfo := .cPrj ⟨idx, cidx, blockAddr⟩ + let ctorProj : Ixon.Constant := ⟨ctorProjInfo, #[], #[], #[]⟩ + let ctorMeta := metaMap.get? ctor.cnst.name |>.getD .empty + projections := projections.push (ctor.cnst.name, ctorProj, ctorMeta) + cidx := cidx + 1 + idx := idx + 1 + + pure ⟨block, blockBytes, blockAddr, .empty, projections⟩ + +/-! ## Main Compilation Entry Points -/ + +/-- Build mutCtx for an inductive: includes the inductive and all its constructors. -/ +def buildInductiveMutCtx (i : InductiveVal) (ctorVals : Array ConstructorVal) : Ix.MutCtx := Id.run do + let mut ctx : Ix.MutCtx := Batteries.RBMap.empty + -- Inductive at index 0 + ctx := ctx.insert i.cnst.name 0 + -- Constructors at indices 1, 2, ... + for (ctor, idx) in ctorVals.zipIdx do + ctx := ctx.insert ctor.cnst.name (idx + 1) + return ctx + +/-- Build a BlockResult from a block constant, serializing once. -/ +def BlockResult.mk' (block : Ixon.Constant) (blockMeta : Ixon.ConstantMeta := .empty) + (projections : Array (Name × Ixon.Constant × Ixon.ConstantMeta) := #[]) : BlockResult := + let blockBytes := Ixon.ser block + let blockAddr := Address.blake3 blockBytes + ⟨block, blockBytes, blockAddr, blockMeta, projections⟩ + +/-- Compile a single Ix.ConstantInfo directly (singleton, non-mutual). + Returns BlockResult with the constant and any projections needed. -/ +def compileConstantInfo (const : ConstantInfo) : CompileM BlockResult := do + let name := const.getCnst.name + let mutCtx : Ix.MutCtx := Batteries.RBMap.empty.insert name 0 + withMutCtx mutCtx do match const with - | .defn d => do - let (dat, met) <- .withMutCtx (.single d.name 0) <| compileDefn d - pure (.defn dat, .meta met) - | .recr r => do - let (dat, met) <- .withMutCtx (.single r.name 0) <| compileRecr r - pure (.recr dat, .meta met) - | _ => unreachable! + | .defnInfo d => + let (defn, constMeta, tExpr, vExpr) ← compileDefinition d + let cache ← getBlockState + let block := buildConstantWithSharing (.defn defn) #[tExpr, vExpr] cache.refs cache.univs + pure (BlockResult.mk' block constMeta) + + | .thmInfo d => + let (defn, constMeta, tExpr, vExpr) ← compileTheorem d + let cache ← getBlockState + let block := buildConstantWithSharing (.defn defn) #[tExpr, vExpr] cache.refs cache.univs + pure (BlockResult.mk' block constMeta) + + | .opaqueInfo d => + let (defn, constMeta, tExpr, vExpr) ← compileOpaque d + let cache ← getBlockState + let block := buildConstantWithSharing (.defn defn) #[tExpr, vExpr] cache.refs cache.univs + pure (BlockResult.mk' block constMeta) + + | .axiomInfo a => + let (axio, constMeta, typeExpr) ← compileAxiom a + let cache ← getBlockState + let block := buildConstantWithSharing (.axio axio) #[typeExpr] cache.refs cache.univs + pure (BlockResult.mk' block constMeta) + + | .quotInfo q => + let (quot, constMeta, typeExpr) ← compileQuotient q + let cache ← getBlockState + let block := buildConstantWithSharing (.quot quot) #[typeExpr] cache.refs cache.univs + pure (BlockResult.mk' block constMeta) + + | .recInfo r => + let (recursor, constMeta, tExpr) ← compileRecursor r + let mut allExprs : Array Ixon.Expr := #[tExpr] + for rule in recursor.rules do + allExprs := allExprs.push rule.rhs + let cache ← getBlockState + let block := buildConstantWithSharing (.recr recursor) allExprs cache.refs cache.univs + pure (BlockResult.mk' block constMeta) + + | .inductInfo i => + -- Look up constructor values from environment + let mut ctorVals : Array ConstructorVal := #[] + for ctorName in i.ctors do + let ctorConst ← findConst ctorName + match ctorConst with + | .ctorInfo c => ctorVals := ctorVals.push c + | _ => throw (.invalidMutualBlock s!"Expected constructor for {ctorName}") + -- Build mutCtx with all names in the inductive family + let indMutCtx := buildInductiveMutCtx i ctorVals + withMutCtx indMutCtx do + let (ind, indMeta, ctorMetaPairs, ctorExprs) ← compileInductive i ctorVals + let cache ← getBlockState + -- Wrap single inductive in muts for consistency + let block := buildConstantWithSharing (.muts #[.indc ind]) ctorExprs cache.refs cache.univs + -- Compute block address for projections + let blockBytes := Ixon.ser block + let blockAddr := Address.blake3 blockBytes + -- Create projections for inductive and constructors + let mut projections : Array (Name × Ixon.Constant × Ixon.ConstantMeta) := #[] + -- Inductive projection (index 0) + let indProjInfo : Ixon.ConstantInfo := .iPrj ⟨0, blockAddr⟩ + let indProj : Ixon.Constant := ⟨indProjInfo, #[], #[], #[]⟩ + projections := projections.push (i.cnst.name, indProj, indMeta) + -- Constructor projections from ctorMetaPairs + for ((ctorName, ctorMeta), cidx) in ctorMetaPairs.zipIdx do + let ctorProjInfo : Ixon.ConstantInfo := .cPrj ⟨0, cidx.toUInt64, blockAddr⟩ + let ctorProj : Ixon.Constant := ⟨ctorProjInfo, #[], #[], #[]⟩ + projections := projections.push (ctorName, ctorProj, ctorMeta) + pure ⟨block, blockBytes, blockAddr, .empty, projections⟩ + + | .ctorInfo c => + -- Constructors are compiled by compiling their parent inductive + let parentConst ← findConst c.induct + match parentConst with + | .inductInfo i => + let mut ctorVals : Array ConstructorVal := #[] + for ctorName in i.ctors do + let ctorConst ← findConst ctorName + match ctorConst with + | .ctorInfo cv => ctorVals := ctorVals.push cv + | _ => throw (.invalidMutualBlock s!"Expected constructor") + -- Build mutCtx with all names in the inductive family + let indMutCtx := buildInductiveMutCtx i ctorVals + withMutCtx indMutCtx do + let (ind, indMeta, ctorMetaPairs, ctorExprs) ← compileInductive i ctorVals + let cache ← getBlockState + let block := buildConstantWithSharing (.muts #[.indc ind]) ctorExprs cache.refs cache.univs + let blockBytes := Ixon.ser block + let blockAddr := Address.blake3 blockBytes + let mut projections : Array (Name × Ixon.Constant × Ixon.ConstantMeta) := #[] + let indProjInfo : Ixon.ConstantInfo := .iPrj ⟨0, blockAddr⟩ + let indProj : Ixon.Constant := ⟨indProjInfo, #[], #[], #[]⟩ + projections := projections.push (i.cnst.name, indProj, indMeta) + for ((ctorName, ctorMeta), cidx) in ctorMetaPairs.zipIdx do + let ctorProjInfo : Ixon.ConstantInfo := .cPrj ⟨0, cidx.toUInt64, blockAddr⟩ + let ctorProj : Ixon.Constant := ⟨ctorProjInfo, #[], #[], #[]⟩ + projections := projections.push (ctorName, ctorProj, ctorMeta) + pure ⟨block, blockBytes, blockAddr, .empty, projections⟩ + | _ => throw (.invalidMutualBlock s!"Constructor has non-inductive parent") + +/-- Compile a constant by name (looks it up in the environment). + Uses the block's `all` set from BlockEnv (populated from SCC analysis). -/ +def compileConstant (name : Name) : CompileM BlockResult := do + let const ← findConst name + let blockEnv ← getBlockEnv + -- Use the block's all set from SCC analysis + let all := blockEnv.all + + -- Handle singleton non-mutual constants + if all.size == 1 then + compileConstantInfo const else - let mut consts := #[] - for name in all do - match <- findLeanConst name with - | .inductInfo val => do consts := consts.push (<- MutConst.mkIndc val) - | .defnInfo val => consts := consts.push (MutConst.mkDefn val) - | .opaqueInfo val => consts := consts.push (MutConst.mkOpaq val) - | .thmInfo val => consts := consts.push (MutConst.mkTheo val) - | .recInfo val => consts := consts.push (MutConst.recr val) + -- Multi-constant mutual block + let mut consts : Array MutConst := #[] + for n in all do + match ← findConst n with + | .inductInfo val => consts := consts.push (← MutConst.mkIndc val) + | .defnInfo val => consts := consts.push (MutConst.fromDefinitionVal val) + | .opaqueInfo val => consts := consts.push (MutConst.fromOpaqueVal val) + | .thmInfo val => consts := consts.push (MutConst.fromTheoremVal val) + | .recInfo val => consts := consts.push (.recr val) | _ => continue - --dbg_trace s!"compileMutual {const.name} consts: {repr <| consts.map (·.name)}" - -- sort MutConsts into equivalence classes - let mutConsts <- sortConsts consts.toList - --dbg_trace s!"compileMutual {const.name} mutConsts: {repr <| mutConsts.map (·.map (·.name))}" - let mutCtx := MutConst.ctx mutConsts - --dbg_trace s!"compileMutual {const.name} mutCtx: {repr mutCtx}" - let mutMeta <- mutConsts.mapM fun m => m.mapM <| fun c => compileName c.name - -- compile each constant with the mutCtx - let (data, metas) <- .withMutCtx mutCtx (compileMutConsts mutConsts) - -- add top-level mutual block to our state - let ctx <- mutCtx.toList.mapM fun (n, i) => do - pure (<- compileName n, <- storeNat i) - let block: MetaAddress := - ⟨<- storeIxon data, <- storeMeta ⟨[.muts mutMeta, .map ctx, .map metas.toList]⟩⟩ - modify fun stt => { stt with blocks := stt.blocks.insert block } - -- then add all projections, returning the inductive we started with - let mut ret? : Option (Ixon × Ixon) := none - for const' in consts do - let idx <- do match mutCtx.find? const'.name with - | some idx => pure idx - | none => throw $ .cantFindMutIndex const'.name mutCtx - let n <- compileName const'.name - let «meta» <- do match metas.find? n with - | some «meta» => pure ⟨[.link block.meta, .link «meta»]⟩ - | none => throw $ .cantFindMutMeta const'.name metas - let data := match const with - | .defn _ => .dprj ⟨idx, block.data⟩ - | .indc _ => .iprj ⟨idx, block.data⟩ - | .recr _ => .rprj ⟨idx, block.data⟩ - let addr := ⟨<- storeIxon data, <- storeMeta «meta»⟩ - modify fun stt => { stt with - constCache := stt.constCache.insert const'.name addr + + let mutConsts ← sortConsts consts.toList + compileMutualBlock mutConsts + +/-! ## Block Compilation Entry Point -/ + +/-- Compile a single block purely, returning the block result and state. -/ +def compileBlockPure (compileEnv : CompileEnv) (all : Set Name) (lo : Name) + : Except CompileError (BlockResult × BlockState) := + let blockEnv : BlockEnv := { + all := all + current := lo + mutCtx := default + univCtx := [] + } + CompileM.run compileEnv blockEnv {} (compileConstant lo) + +/-! ## Main Compilation Entry Point -/ + +/-- Compile an Ix.Environment purely (sequential, no IO). + Returns the compiled Ixon.Env and total serialized bytes. + Pass `dbg := true` to enable progress tracing via dbg_trace. -/ +def compileEnv (env : Ix.Environment) (blocks : Ix.CondensedBlocks) (dbg : Bool := false) + : Except String (Ixon.Env × Nat) := Id.run do + -- Initialize compilation state + let mut compileEnv := CompileEnv.new env + let mut blockNames : Std.HashMap Address Ix.Name := {} + + -- Build work queue data structures + let totalBlocks := blocks.blocks.size + + -- blockInfo: lo → (all names in block, remaining dep count) + let mut blockInfo : Std.HashMap Name (Set Name × Nat) := {} + -- reverseDeps: constant name → list of block lowlinks that depend on it + let mut reverseDeps : Std.HashMap Name (Array Name) := {} + + for (lo, all) in blocks.blocks do + let deps := blocks.blockRefs.get! lo + blockInfo := blockInfo.insert lo (all, deps.size) + -- Register reverse dependencies + for depName in deps do + reverseDeps := reverseDeps.alter depName fun + | some arr => some (arr.push lo) + | none => some #[lo] + + -- Initialize ready queue with blocks that have no dependencies + let mut readyQueue : Array (Name × Set Name) := #[] + for (lo, (all, depCount)) in blockInfo do + if depCount == 0 then + readyQueue := readyQueue.push (lo, all) + + -- Compile blocks in dependency order + let mut blocksCompiled : Nat := 0 + let mut lastPct : Nat := 0 + + while !readyQueue.isEmpty do + -- Pop from ready queue + let (lo, all) := readyQueue.back! + readyQueue := readyQueue.pop + + match compileBlockPure compileEnv all lo with + | Except.ok (result, cache) => + -- Use pre-computed serialized bytes and address + let blockBytes := result.blockBytes + let blockAddr := result.blockAddr + compileEnv := { compileEnv with + totalBytes := compileEnv.totalBytes + blockBytes.size + constants := compileEnv.constants.insert blockAddr result.block + blobs := cache.blockBlobs.fold (fun m k v => m.insert k v) compileEnv.blobs } - if const'.name == const.name then ret? := some (data, .meta «meta») - for ctor in const'.ctors do - let cdata := .cprj ⟨idx, ctor.cidx, block.data⟩ - let cn <- compileName ctor.name - let cmeta <- do match metas.find? cn with - | some «meta» => pure ⟨[.link block.meta, .link «meta»]⟩ - | none => throw $ .cantFindMutMeta const'.name metas - let caddr := ⟨<- storeIxon cdata, <- storeMeta cmeta⟩ - modify fun stt => { stt with - constCache := stt.constCache.insert ctor.name caddr - } - match ret? with - | some ret => return ret - | none => throw $ .mutualBlockMissingProjection const.name - - -def compileConstant (name: Lean.Name): CompileM MetaAddress := do - --dbg_trace "compileConstant {name}" - match (<- get).constCache.find? name with - | some x => pure x - | none => do - let c <- findLeanConst name - let maddr <- .withCurrent name <| go c - modifyGet fun stt => (maddr, { stt with - constCache := stt.constCache.insert name maddr - }) - where - store: Ixon × Ixon -> CompileM MetaAddress - | (d, m) => do pure ⟨<- storeIxon d, <- storeIxon m⟩ - go : Lean.ConstantInfo -> CompileM MetaAddress - | .defnInfo val => compileMutual (MutConst.mkDefn val) >>= store - | .thmInfo val => compileMutual (MutConst.mkTheo val) >>= store - | .opaqueInfo val => compileMutual (MutConst.mkOpaq val) >>= store - | .inductInfo val => MutConst.mkIndc val >>= compileMutual >>= store - | .ctorInfo val => do match <- findLeanConst val.induct with - | .inductInfo ind => do - let _ <- MutConst.mkIndc ind >>= compileMutual - match (<- get).constCache.find? val.name with - | some maddr => do pure maddr - | none => throw <| .mutualBlockMissingProjection val.name - | c => throw <| .invalidConstantKind c.name "inductive" c.ctorName - | .recInfo val => compileMutual (MutConst.recr val) >>= store - | .axiomInfo ⟨⟨name, lvls, type⟩, isUnsafe⟩ => .withLevels lvls do - let n <- compileName name - let ls <- lvls.mapM compileName - let t <- compileExpr type - let dat := .axio ⟨isUnsafe, lvls.length, t.data⟩ - let met := .meta ⟨[.link n, .links ls, .link t.meta]⟩ - store (dat, met) - | .quotInfo ⟨⟨name, lvls, type⟩, kind⟩ => .withLevels lvls do - let n <- compileName name - let ls <- lvls.mapM compileName - let t <- compileExpr type - let dat := .quot ⟨kind, lvls.length, t.data⟩ - let met := .meta ⟨[.link n, .links ls, .link t.meta]⟩ - store (dat, met) - ---partial def makeLeanDef --- (name: Lean.Name) (levelParams: List Lean.Name) (type value: Lean.Expr) --- : Lean.DefinitionVal := --- { name, levelParams, type, value, hints := .opaque, safety := .safe } --- ---partial def tryAddLeanDef (defn: Lean.DefinitionVal) : CompileM Unit := do --- match (<- get).env.constants.find? defn.name with --- | some _ => pure () --- | none => do --- let env <- (·.env) <$> get --- let maxHeartBeats <- (·.maxHeartBeats) <$> get --- let decl := Lean.Declaration.defnDecl defn --- match Lean.Environment.addDeclCore env maxHeartBeats decl .none with --- | .ok e => do --- modify fun stt => { stt with env := e } --- return () --- | .error e => throw $ .kernelException e --- ---partial def addDef (lvls: List Lean.Name) (typ val: Lean.Expr) : CompileM MetaAddress := do --- --let typ' <- compileExpr typ --- --let val' <- compileExpr val --- let anon := .defnInfo ⟨⟨.anonymous, lvls, typ⟩, val, .opaque, .safe, []⟩ --- let (data, «meta») <- compileConstant anon --- let anonAddr := ⟨<- storeIxon data, <- storeIxon «meta»⟩ --- let name := anonAddr.data.toUniqueName --- let const := .defnInfo ⟨⟨name, lvls, typ⟩, val, .opaque, .safe, []⟩ --- let (data, «meta») <- compileConstant const --- let addr := ⟨<- storeIxon data, <- storeIxon «meta»⟩ --- if addr.data != anonAddr.data then --- throw <| .alphaInvarianceFailure anon anonAddr const addr --- else --- tryAddLeanDef (makeLeanDef name lvls typ val) --- return addr --- ---partial def commitConst (addr: MetaAddress) (secret: Address) : CompileM MetaAddress := do --- let comm := Ixon.comm ⟨secret, addr.data⟩ --- let commAddr <- storeIxon comm --- let commMeta := Ixon.comm ⟨secret, addr.meta⟩ --- let commMetaAddr <- storeIxon commMeta --- let addr' := ⟨commAddr, commMetaAddr⟩ --- modify fun stt => { stt with --- comms := stt.comms.insert commAddr.toUniqueName addr' --- } --- return addr' --- ---partial def commitDef (lvls: List Lean.Name) (typ val: Lean.Expr) (secret: Address): CompileM MetaAddress := do --- let addr <- addDef lvls typ val --- let addr' <- commitConst addr secret --- tryAddLeanDef (makeLeanDef addr'.data.toUniqueName lvls typ val) --- --tryAddLeanDecl (makeLeanDef ca.toUniqueName lvls typ (mkConst a.toUniqueName [])) --- return addr' --- ---partial def storeLevel (lvls: Nat) (secret: Option Address): CompileM Address := do --- let addr <- storeNat lvls --- match secret with --- | some secret => do --- let comm := .comm ⟨secret, addr⟩ --- let commAddr <- storeIxon comm --- modify fun stt => { stt with --- comms := stt.comms.insert commAddr.toUniqueName ⟨commAddr, commAddr⟩ --- } --- return commAddr --- | none => return addr --- ---partial def checkClaim --- (const: Lean.Name) --- (type: Lean.Expr) --- (sort: Lean.Expr) --- (lvls: List Lean.Name) --- (secret: Option Address) --- : CompileM (Claim × Address × Address) := do --- let leanConst <- findLeanConst const --- let valAddr <- compileConst leanConst >>= comm --- let typeAddr <- addDef lvls sort type >>= comm --- let lvls <- packLevel lvls.length commit --- return (Claim.checks (CheckClaim.mk lvls type value), typeMeta, valMeta) --- where --- commit (c: Ix.Const) : MetaAddress := do --- match commit with --- | none => dematerializeConst c --- | some secret => --- --- if commit then commitConst (Prod.fst a) (Prod.snd a) else pure a - --- ---partial def evalClaim --- (lvls: List Lean.Name) --- (input: Lean.Expr) --- (output: Lean.Expr) --- (type: Lean.Expr) --- (sort: Lean.Expr) --- (commit: Bool) --- : CompileM (Claim × Address × Address × Address) := do --- let (input, inputMeta) <- addDef lvls type input >>= comm --- let (output, outputMeta) <- addDef lvls type output >>= comm --- let (type, typeMeta) <- addDef lvls sort type >>= comm --- let lvlsAddr <- packLevel lvls.length commit --- return (Claim.evals (EvalClaim.mk lvlsAddr input output type), inputMeta, outputMeta, typeMeta) --- where --- comm a := if commit then commitConst (Prod.fst a) (Prod.snd a) else pure a - ---/-- ---Content-addresses the "delta" of an environment, that is, the content that is ---added on top of the imports. --- ---Important: constants with open references in their expressions are filtered out. ---Open references are variables that point to names which aren't present in the ---`Lean.ConstMap`. ----/ ---def compileDelta (delta : Lean.PersistentHashMap Lean.Name Lean.ConstantInfo) --- : CompileM Unit := delta.forM fun n _ => discard $ compileConstName n --- -----def compileEnv (env: Lean.Environment) ----- : CompileM Unit := do ----- compileDelta env.getDelta ----- env.getConstMap.forM fun n _ => if !c.isUnsafe then discard $ compileConstName n else pure () --- - -instance : Nonempty (Task (CompileM.Result MetaAddress)) := - ⟨Task.pure (.ok default, default)⟩ - -structure ScheduleEnv where - env: Lean.Environment - blocks: CondensedBlocks - comms: Map Lean.Name MetaAddress - -structure ScheduleState where - constTasks: Map Lean.Name (Task (Except IO.Error MetaAddress)) - blockTasks: Map Lean.Name (Task (Except IO.Error (Map Lean.Name MetaAddress))) - -abbrev ScheduleM := ReaderT ScheduleEnv <| StateT ScheduleState IO - -def ScheduleM.run (env: ScheduleEnv) (stt: ScheduleState) (c : ScheduleM α) - : IO (α × ScheduleState) - := StateT.run (ReaderT.run c env) stt - -structure ScheduleStats where - constWaiting: Nat - constRunning: Nat - constFinished: Nat - blockWaiting: Nat - blockRunning: Nat - blockfinished: Nat -deriving Repr - -partial def ScheduleState.stats : ScheduleState -> IO ScheduleStats -| ⟨constTasks, blockTasks⟩ => do - let mut constWaiting := 0 - let mut constRunning := 0 - let mut constFinished := 0 - let mut blockWaiting := 0 - let mut blockRunning := 0 - let mut blockFinished := 0 - for (_, t) in constTasks do - match <- IO.getTaskState t with - | .waiting => constWaiting := constWaiting + 1 - | .running => constRunning := constRunning + 1 - | .finished => constFinished := constFinished + 1 - for (_, t) in blockTasks do - match <- IO.getTaskState t with - | .waiting => blockWaiting := blockWaiting + 1 - | .running => blockRunning := blockRunning + 1 - | .finished => blockFinished := blockFinished + 1 - return ⟨constWaiting, constRunning, constFinished, blockWaiting, blockRunning, blockFinished⟩ - -mutual - -partial def ScheduleM.block (lo: Lean.Name) - : ScheduleM (Task (Except IO.Error (Map Lean.Name MetaAddress))) := do - if let some task := (<- get).blockTasks.get? lo then - return task - else - let mut depTasks := [] - let all := (<- read).blocks.blocks.get! lo - let comms := (<- read).comms - let allRefs := (<- read).blocks.blockRefs.get! lo - for ref in allRefs.filter (!all.contains ·) do - let refTask <- ScheduleM.const ref - depTasks := (ref, refTask)::depTasks - let env := (<- read).env - let task <- bindDeps {} env comms all lo depTasks - modify fun stt => { stt with blockTasks := stt.blockTasks.insert lo task } - return task - where - bindDeps - (acc: Map Lean.Name MetaAddress) - (env: Lean.Environment) - (comms: Map Lean.Name MetaAddress) - (all: Set Lean.Name) - (n: Lean.Name) - : List (Lean.Name × Task (Except IO.Error MetaAddress)) - -> IO (Task (Except IO.Error (Map Lean.Name MetaAddress))) - | [] => IO.asTask <| do - let (res, stt) <- CompileM.run - (.init env acc comms all n) .init (compileConstant n) - match res with - | .ok _ => pure <| stt.constCache.filter (fun n _ => all.contains n) - | .error e => do throw (IO.userError (<- e.pretty)) - | (ref, task) :: rest => IO.bindTask task (fun result => - match result with - | .ok addr => bindDeps (acc.insert ref addr) env comms all n rest - | .error e => do throw e - ) - -partial def ScheduleM.const (n: Lean.Name) - : ScheduleM (Task (Except IO.Error MetaAddress)) := do - if let some task := (<- get).constTasks.get? n then - return task - else - let lo := (<- read).blocks.lowLinks.get! n - let blockTask <- match (<- get).blockTasks.get? lo with - | some bt => pure bt - | none => ScheduleM.block lo - let task <- IO.bindTask blockTask (fun res => match res with - | .ok map => match map.get? n with - | .some x => IO.asTask <| pure x - | .none => do - throw (IO.userError (<- (CompileError.unknownConstant lo n).pretty)) - | .error e => throw e - ) - modify fun stt => { stt with constTasks := stt.constTasks.insert n task } - return task - -end - -partial def ScheduleM.env : ScheduleM Unit := do - let env := (<- read).env - let envSize := env.constants.fold (fun x _ _=> x + 1) 0 - let mut i := 1 - for (n,_) in (<- read).env.constants do - --let stt <- get - dbg_trace s!"scheduling {i}/{envSize} {n}" - let _ <- ScheduleM.const n - i := i + 1 - return () - - -structure CompiledEnv where - consts: Map Lean.Name MetaAddress - refs : Map Lean.Name (Set Lean.Name) - blocks: CondensedBlocks - -partial def CompileM.envTopological - (env: Lean.Environment) - (comms: Map Lean.Name MetaAddress) - : IO CompiledEnv := do - let refs: Map Lean.Name (Set Lean.Name) := GraphM.env env - dbg_trace s!"constants: {refs.size}" - let blocks := CondenseM.run env refs - dbg_trace s!"lowlinks: {blocks.lowLinks.size}, blocks: {blocks.blocks.size}" - - let mut consts: Map Lean.Name MetaAddress := {} - let mut remaining: Set Lean.Name := {} + blockNames := cache.blockNames.fold (fun m k v => m.insert k v) blockNames + + -- If there are projections, store them and map names to projection addresses + if result.projections.isEmpty then + -- No projections: map lowlink name directly to block + compileEnv := { compileEnv with nameToNamed := compileEnv.nameToNamed.insert lo ⟨blockAddr, result.blockMeta⟩ } + else + -- Store each projection and map name to projection address + for (name, proj, constMeta) in result.projections do + let projBytes := Ixon.ser proj + let projAddr := Address.blake3 projBytes + compileEnv := { compileEnv with + totalBytes := compileEnv.totalBytes + projBytes.size + constants := compileEnv.constants.insert projAddr proj + nameToNamed := compileEnv.nameToNamed.insert name ⟨projAddr, constMeta⟩ + } + + -- Decrement dep counts for blocks that depend on constants in this block + for name in all do + if let some dependents := reverseDeps.get? name then + for dependentLo in dependents do + if let some (depAll, depCount) := blockInfo.get? dependentLo then + let newCount := depCount - 1 + blockInfo := blockInfo.insert dependentLo (depAll, newCount) + -- If dep count reaches 0, add to ready queue + if newCount == 0 then + readyQueue := readyQueue.push (dependentLo, depAll) + + blocksCompiled := blocksCompiled + 1 + if dbg then + let pct := (blocksCompiled * 100) / totalBlocks + if pct >= lastPct + 10 then + dbg_trace s!" [Compile] {pct}% ({blocksCompiled}/{totalBlocks})" + lastPct := pct + | Except.error e => + if dbg then + dbg_trace s!" [Compile ERROR] {lo}: {e}" + dbg_trace s!" [Compile] nameToNamed has {compileEnv.nameToNamed.size} entries" + return .error s!"Compilation error in {lo}: {e}" + + -- Check that all blocks were compiled + if blocksCompiled != totalBlocks then + return .error s!"Only compiled {blocksCompiled}/{totalBlocks} blocks - circular dependency?" + + -- Build reverse index and names map, storing name string components as blobs + -- Seed with blockNames collected during compilation (binder names, level params, etc.) + let (addrToNameMap, namesMap, nameBlobs) := + compileEnv.nameToNamed.fold (init := ({}, blockNames, {})) fun (addrMap, namesMap, blobs) name named => + let addrMap := addrMap.insert named.addr name + let (namesMap, blobs) := Ixon.RawEnv.addNameComponentsWithBlobs namesMap blobs name + (addrMap, namesMap, blobs) + + -- Merge name string blobs into the main blobs map + let allBlobs := nameBlobs.fold (fun m k v => m.insert k v) compileEnv.blobs + + let ixonEnv : Ixon.Env := { + consts := compileEnv.constants + named := compileEnv.nameToNamed + blobs := allBlobs + names := namesMap + comms := {} + addrToName := addrToNameMap + } + + return .ok (ixonEnv, compileEnv.totalBytes) + +/-! ## Parallel Compilation with Work-Stealing -/ + +/-- Reference to Rust compilation results for incremental comparison. -/ +structure RustRef where + /-- Map from constant name to compiled address -/ + nameToAddr : Std.HashMap Name Address + +/-- A single constant's mismatch info -/ +structure ConstMismatch where + name : Name + leanAddr : Address + rustAddr : Address + leanBytes : ByteArray + leanConst : Ixon.Constant + deriving Inhabited + +/-- Mismatch error with all info needed for debugging -/ +structure MismatchError where + /-- The block's lowlink name -/ + blockName : Name + /-- The main block constant (mutual definitions) -/ + mainBlock : Ixon.Constant + /-- Serialized bytes of the main block -/ + mainBlockBytes : ByteArray + /-- Address of the main block -/ + mainBlockAddr : Address + /-- All projection constants in the block with their info -/ + projections : Array ConstMismatch + /-- The specific constant that triggered the mismatch -/ + failedConst : ConstMismatch + /-- Optional system error message (for non-mismatch errors) -/ + systemError : Option String := none + +/-- Create a system error (not a mismatch) -/ +def MismatchError.system (msg : String) : MismatchError := + { blockName := default, mainBlock := default, mainBlockBytes := default, mainBlockAddr := default, + projections := #[], failedConst := ⟨default, default, default, default, default⟩, systemError := some msg } + +/-- Result of compiling a single block. -/ +structure BlockCompileResult where + /-- Lowlink name of the block -/ + lo : Name + /-- All names in the block -/ + all : Set Name + /-- The compiled block constant -/ + block : Ixon.Constant + /-- Block address -/ + blockAddr : Address + /-- Projections: name → (projection constant, projection address, metadata) -/ + projections : Array (Name × Ixon.Constant × Address × Ixon.ConstantMeta) + /-- Blobs collected during compilation -/ + blobs : Std.HashMap Address ByteArray + /-- Total serialized bytes -/ + totalBytes : Nat + +/-- Shared state for parallel compilation. Protected by mutex. -/ +structure ParallelState where + /-- Map from constant name to Named (address + metadata) -/ + nameToNamed : Std.HashMap Name Ixon.Named + /-- Compiled constants storage -/ + constants : Std.HashMap Address Ixon.Constant + /-- Blob storage -/ + blobs : Std.HashMap Address ByteArray + /-- Total bytes compiled -/ + totalBytes : Nat + /-- Block dependency counts (remaining deps) -/ + blockDepCounts : Std.HashMap Name Nat + /-- Blocks compiled so far -/ + blocksCompiled : Nat + /-- First error encountered (if any) -/ + firstError : Option String + /-- Mismatches found during incremental comparison -/ + mismatches : Array (Name × Address × Address) -- (name, lean addr, rust addr) + /-- Last printed percentage (for progress tracking) -/ + lastPrintedPct : Nat + +/-- Result of compiling a single block in a wave. -/ +structure WaveBlockResult where + lo : Name + all : Set Name + block : Ixon.Constant + blockAddr : Address + projections : Array (Name × Ixon.Constant × Address × Ixon.ConstantMeta) + blobs : Std.HashMap Address ByteArray + names : Std.HashMap Address Ix.Name + totalBytes : Nat + +/-- Work item for a worker thread -/ +structure WorkItem where + lo : Name + all : Set Name + compileEnv : CompileEnv + rustRef : Option RustRef + +instance : Inhabited WorkItem where + default := { lo := default, all := {}, compileEnv := default, rustRef := none } + +instance : Inhabited (Except MismatchError WaveBlockResult) where + default := .error { blockName := default, mainBlock := default, mainBlockBytes := default, + mainBlockAddr := default, projections := #[], + failedConst := ⟨default, default, default, default, default⟩ } + +/-- Compile an Ix.Environment in parallel using dedicated workers. + Workers are created once and reused across waves. + Each wave compiles all blocks whose dependencies are satisfied. + Optionally compares results against Rust incrementally - fails fast on first mismatch. + Returns the compiled Ixon.Env and total bytes, or a MismatchError on first discrepancy. -/ +def compileEnvParallel (env : Ix.Environment) (blocks : Ix.CondensedBlocks) + (rustRef : Option RustRef := none) (numWorkers : Nat := 32) (dbg : Bool := false) + : IO (Except MismatchError (Ixon.Env × Nat)) := do + let totalBlocks := blocks.blocks.size + + -- Create channels for work distribution (using Sync for blocking operations) + let workChan ← Std.CloseableChannel.Sync.new (α := WorkItem) + let resultChan ← Std.CloseableChannel.Sync.new (α := Except MismatchError WaveBlockResult) + + -- Worker function: receive work, compile, send result + let worker (_workerId : Nat) : IO Unit := do + while true do + match ← workChan.recv with + | none => break -- Channel closed, exit + | some item => + let result : Except MismatchError WaveBlockResult := Id.run do + match compileBlockPure item.compileEnv item.all item.lo with + | Except.error e => + return .error <| .system s!"Compilation error in {item.lo}: {e}" + | Except.ok (blockResult, cache) => + -- Use pre-computed serialized bytes and address + let blockBytes := blockResult.blockBytes + let blockAddr := blockResult.blockAddr + let mut projections : Array (Name × Ixon.Constant × Address × ByteArray × Ixon.ConstantMeta) := #[] + let mut projBytes := blockBytes.size + + if blockResult.projections.isEmpty then + projections := #[(item.lo, blockResult.block, blockAddr, blockBytes, blockResult.blockMeta)] + else + for (name, proj, constMeta) in blockResult.projections do + let pBytes := Ixon.ser proj + let pAddr := Address.blake3 pBytes + projections := projections.push (name, proj, pAddr, pBytes, constMeta) + projBytes := projBytes + pBytes.size + + -- Check against Rust reference - fail fast on first mismatch + if let some rust := item.rustRef then + -- Build full block info for all projection constants + let projMismatches : Array ConstMismatch := projections.map fun (name, const, leanAddr, bytes, _) => + let rustAddr := rust.nameToAddr.get? name |>.getD default + ⟨name, leanAddr, rustAddr, bytes, const⟩ + + -- Check for any mismatch + for cm in projMismatches do + if let some rustAddr := rust.nameToAddr.get? cm.name then + if cm.leanAddr != rustAddr then + return .error { + blockName := item.lo + mainBlock := blockResult.block + mainBlockBytes := blockBytes + mainBlockAddr := blockAddr + projections := projMismatches + failedConst := { cm with rustAddr } + } + + -- Convert projections to the format without bytes for the result + let projsNoBytes := projections.map fun (n, c, a, _, m) => (n, c, a, m) + + return .ok { + lo := item.lo + all := item.all + block := blockResult.block + blockAddr + projections := projsNoBytes + blobs := cache.blockBlobs + names := cache.blockNames + totalBytes := projBytes + } + discard <| resultChan.send result + + -- Spawn dedicated worker threads + let mut workerTasks : Array (Task (Except IO.Error Unit)) := #[] + for i in [:numWorkers] do + let task ← IO.asTask (prio := .dedicated) (worker i) + workerTasks := workerTasks.push task + + -- Track compiled constants and remaining blocks + let mut nameToNamed : Std.HashMap Name Ixon.Named := {} + let mut constants : Std.HashMap Address Ixon.Constant := {} + let mut blobs : Std.HashMap Address ByteArray := {} + let mut blockNames : Std.HashMap Address Ix.Name := {} + let mut totalBytes : Nat := 0 + + let mut remaining : Set Name := {} for (lo, _) in blocks.blocks do remaining := remaining.insert lo - let numBlocks := remaining.size - let mut i := 0 + let baseCompileEnv := CompileEnv.new env + + if dbg then + IO.println s!" [Lean Compile] {totalBlocks} blocks, {numWorkers} workers" + + let mut waveNum := 0 + let mut compiled := 0 - dbg_trace s!"compiling {numBlocks} blocks" while !remaining.isEmpty do - i := i + 1 - let pct := ((Float.ofNat remaining.size) / Float.ofNat numBlocks) - dbg_trace s!"Wave {i}, {(1 - pct) * 100}%: {remaining.size}/{numBlocks} blocks remaining" + waveNum := waveNum + 1 - let mut ready : Array (Lean.Name × Set Lean.Name) := #[] - for r in remaining do - let lo := blocks.lowLinks.get! r + -- Find all blocks ready to compile (all deps satisfied) + let mut ready : Array (Name × Set Name) := #[] + for lo in remaining do let all := blocks.blocks.get! lo - let allDeps : Set Lean.Name := blocks.blockRefs.get! lo - if allDeps.all (consts.contains ·) then - ready := ready.push (r, all) - else - continue + let deps := blocks.blockRefs.get! lo + if deps.all (nameToNamed.contains ·) then + ready := ready.push (lo, all) + + if ready.isEmpty then + discard <| workChan.close + return .error <| .system s!"Circular dependency detected: {remaining.size} blocks remaining but none ready" + + if dbg then + let pct := (compiled * 100) / totalBlocks + IO.println s!" [Lean Compile] Wave {waveNum}: {ready.size} blocks ready, {pct}% ({compiled}/{totalBlocks})" - let mut tasks := #[] + -- Create compileEnv for this wave (with current nameToNamed) + let compileEnv := { baseCompileEnv with nameToNamed } + -- Send all ready blocks to workers for (lo, all) in ready do - --dbg_trace s!"Wave {i}: scheduling {lo}" - let task <- IO.asTask <| CompileM.run - (.init env consts comms all lo) .init (compileConstant lo) - tasks := tasks.push task - - for task in tasks do - match <- IO.wait task with - | .ok (.ok _, stt) => consts := consts.union stt.constCache - | .ok (.error e, _) => throw (IO.userError (<- e.pretty)) - | .error e => throw e - - for (r, _) in ready do - remaining := remaining.erase r - - return ⟨consts, refs, blocks⟩ - -partial def CompileM.envScheduler - (env: Lean.Environment) - (comms: Map Lean.Name MetaAddress) - : IO CompiledEnv := do - let refs: Map Lean.Name (Set Lean.Name) := GraphM.env env - println! s!"constants: {refs.size}" - let blocks := CondenseM.run env refs - println! s!"lowlinks: {blocks.lowLinks.size}, blocks: {blocks.blocks.size}" - - let (_, stt) <- ScheduleM.run ⟨env, blocks, comms⟩ ⟨{}, {}⟩ ScheduleM.env - - let mut consts := {} - - let tasksSize := stt.constTasks.size - let mut i := 1 - - while true do - let stats <- ScheduleState.stats stt - if stats.blockWaiting > 0 then - println! s!"waiting {repr <| <- ScheduleState.stats stt}" - continue - else - break - - for (n, task) in stt.constTasks do - println! s!"waiting {i}/{tasksSize}" - i := i + 1 - match (<- IO.wait task) with - | .ok addr => consts := consts.insert n addr - | .error e => throw e - - return ⟨consts, refs, blocks⟩ - ---partial def CompileM.const --- (name: Lean.Name) (env: Lean.Environment) (comms: Map Lean.Name MetaAddress) --- : Except CompileError CompiledEnv := Id.run do --- let refs: Map Lean.Name (Set Lean.Name) := GraphM.env env --- let blocks := CondenseM.run env refs --- --- let (_, stt) <- ScheduleM.run ⟨env, blocks, comms, refs⟩ ⟨{}⟩ (scheduleConst name) --- --- let mut consts: Map Lean.Name MetaAddress := {} --- let mut store: Map Address ByteArray := {} --- let mut axioms: Set Lean.Name := {} --- --- for (n, task) in stt.tasks do --- dbg_trace "compiling {n}" --- match task.get with --- | .error e _ => return .error e --- | .ok addr stt => --- consts := consts.insert n addr --- store := store.union stt.store --- axioms := axioms.union stt.axioms --- --- return .ok ⟨consts, store, axioms, refs, blocks.count, blocks.alls⟩ - ---def CompileM.runIO' (c : CompileM α) --- (stt: CompileState) --- : IO (α × CompileState) := do --- match <- c.run .init stt with --- | (.ok a, stt) => return (a, stt) --- | (.error e, _) => throw (IO.userError (<- e.pretty)) --- ---def compileEnvIO (env: Lean.Environment) : IO (CompileState):= do --- Prod.snd <$> (compileDelta env.getDelta).runIO env - ---end Ix + discard <| workChan.send { lo, all, compileEnv, rustRef } + + -- Collect results for this wave + for _ in [:ready.size] do + match ← resultChan.recv with + | none => + discard <| workChan.close + return .error <| .system "Result channel closed unexpectedly" + | some (.error e) => + discard <| workChan.close + return .error e + | some (.ok result) => + -- Store block constant + constants := constants.insert result.blockAddr result.block + -- Store projections and update nameToNamed + for (name, proj, addr, constMeta) in result.projections do + constants := constants.insert addr proj + nameToNamed := nameToNamed.insert name ⟨addr, constMeta⟩ + -- Store blobs and names + blobs := result.blobs.fold (fun m k v => m.insert k v) blobs + blockNames := result.names.fold (fun m k v => m.insert k v) blockNames + totalBytes := totalBytes + result.totalBytes + compiled := compiled + 1 + + -- Remove completed blocks from remaining + for (lo, _) in ready do + remaining := remaining.erase lo + + -- Close work channel to signal workers to exit + discard <| workChan.close + + if dbg then + IO.println s!" [Lean Compile] All {waveNum} waves finished, {compiled} blocks compiled" + + -- Check all blocks compiled + if compiled != totalBlocks then + return .error <| .system s!"Only compiled {compiled}/{totalBlocks} blocks - circular dependency?" + + -- Build reverse index and names map, storing name string components as blobs + -- Seed with blockNames collected during compilation (binder names, level params, etc.) + let (addrToNameMap, namesMap, nameBlobs) := + nameToNamed.fold (init := ({}, blockNames, {})) fun (addrMap, namesMap, nameBlobs) name named => + let addrMap := addrMap.insert named.addr name + let (namesMap, nameBlobs) := Ixon.RawEnv.addNameComponentsWithBlobs namesMap nameBlobs name + (addrMap, namesMap, nameBlobs) + + -- Merge name string blobs into the main blobs map + let blockBlobCount := blobs.size + let nameBlobCount := nameBlobs.size + let allBlobs := nameBlobs.fold (fun m k v => m.insert k v) blobs + let finalBlobCount := allBlobs.size + let overlapCount := blockBlobCount + nameBlobCount - finalBlobCount + + if dbg then + IO.println s!" [Lean Compile] Blobs: {blockBlobCount} from blocks, {nameBlobCount} from names, {overlapCount} overlap, {finalBlobCount} final" + + let ixonEnv : Ixon.Env := { + consts := constants + named := nameToNamed + blobs := allBlobs + names := namesMap + comms := {} + addrToName := addrToNameMap + } + + return .ok (ixonEnv, totalBytes) + +/-! ## Rust Compilation FFI -/ + +/-- FFI: Compile a Lean environment to serialized Ixon.Env bytes using Rust. -/ +@[extern "rs_compile_env"] +opaque rsCompileEnvBytesFFI : @& List (Lean.Name × Lean.ConstantInfo) → IO ByteArray + +/-- Compile a Lean environment to Ixon.Env bytes using the Rust compiler. -/ +def rsCompileEnvBytes (leanEnv : Lean.Environment) : IO ByteArray := do + let constList := leanEnv.constants.toList + rsCompileEnvBytesFFI constList + +-- Re-export RawEnv types from Ixon for backwards compatibility +export Ixon (RawConst RawNamed RawBlob RawComm RawEnv) + +/-- FFI: Compile a Lean environment to RawEnv (structured Lean objects) using Rust. -/ +@[extern "rs_compile_env_to_ixon"] +opaque rsCompileEnvFFI : @& List (Lean.Name × Lean.ConstantInfo) → IO Ixon.RawEnv + +/-! ## Combined Compile Phases FFI -/ + +/-- Raw FFI type returned from Rust's rs_compile_phases. + Contains all compilation phases in array-based format for FFI compatibility. -/ +structure RustCompilePhases where + rawEnv : Ix.RawEnvironment -- Array-based canonicalized constants + condensed : RustCondensedBlocks -- Array-based SCC data + compileEnv : RawEnv -- Ixon raw type (RawConst, RawNamed, etc.) + deriving Inhabited, Repr + +/-- Nice Lean type with proper data structures. + Converted from RustCompilePhases for ergonomic use in Lean. -/ +structure CompilePhases where + rawEnv : Ix.Environment -- HashMap-based canonicalized constants + condensed : CondensedBlocks -- Map/Set-based SCC data + compileEnv : Ixon.Env -- HashMap-based Ixon environment + +/-- FFI: Run all compilation phases in Rust and return structured data. -/ +@[extern "rs_compile_phases"] +opaque rsCompilePhasesFFI : @& List (Lean.Name × Lean.ConstantInfo) → IO RustCompilePhases + +/-- Run all compilation phases using Rust and convert to Lean-friendly types. + This is the main entry point for getting Rust compilation results. -/ +def rsCompilePhases (leanEnv : Lean.Environment) : IO CompilePhases := do + let constList := leanEnv.constants.toList + let raw ← rsCompilePhasesFFI constList + + -- Convert RawEnvironment to Environment + let rawEnv := raw.rawEnv.toEnvironment + + -- Convert RustCondensedBlocks to CondensedBlocks + let condensed := raw.condensed.toCondensedBlocks + + -- Convert RawEnv to Ixon.Env + let compileEnv := raw.compileEnv.toEnv + + pure { rawEnv, condensed, compileEnv } + +/-- Compile a Lean environment to Ixon.Env using the Rust compiler. + Uses the direct FFI that returns structured Lean objects. -/ +def rsCompileEnv (leanEnv : Lean.Environment) : IO Ixon.Env := do + let constList := leanEnv.constants.toList + let rawEnv ← rsCompileEnvFFI constList + pure rawEnv.toEnv + +end Ix.CompileM diff --git a/Ix/CondenseM.lean b/Ix/CondenseM.lean index 43908f13..8a1188df 100644 --- a/Ix/CondenseM.lean +++ b/Ix/CondenseM.lean @@ -1,84 +1,130 @@ +/- + # CondenseM: Strongly Connected Component Condensation + + Implements Tarjan's SCC algorithm to condense a dependency graph of Ix.Names + into its strongly connected components (mutual blocks). Each SCC becomes a + single node in the condensed DAG. + + The algorithm assigns each node a numeric `id` (discovery order) and tracks + a `lowLink` — the smallest id reachable from that node via back-edges. When + `lowLink[id] == id`, the node is the root of an SCC, and all nodes on the + stack down to it form the component. + + The output `CondensedBlocks` provides: + - `lowLinks`: maps each constant to its SCC representative + - `blocks`: maps each SCC representative to all members + - `blockRefs`: maps each SCC representative to its external references +-/ + import Lean import Ix.Common +import Ix.Environment namespace Ix structure CondenseEnv where - env : Lean.Environment - outRefs: Map Lean.Name (Set Lean.Name) + /-- Set of valid names (names that exist in the environment) -/ + validNames: Set Ix.Name + /-- Reference graph: map from Ix.Name to the set of names it references -/ + outRefs: Map Ix.Name (Set Ix.Name) structure CondenseState where - names: Map Lean.Name UInt64 - ids: Map UInt64 Lean.Name + /-- Map from name to its discovery id -/ + names: Map Ix.Name UInt64 + /-- Reverse map from discovery id to name -/ + ids: Map UInt64 Ix.Name + /-- Smallest reachable discovery id for each node (determines SCC roots) -/ lowLink: Map UInt64 UInt64 + /-- Tarjan's working stack of node ids being processed -/ stack: Array UInt64 + /-- Set of ids currently on the stack (for O(1) membership checks) -/ onStack: Set UInt64 + /-- Next discovery id to assign -/ id : UInt64 def CondenseState.init : CondenseState := ⟨{}, {}, {}, #[], {}, 0⟩ abbrev CondenseM := ReaderT CondenseEnv <| StateT CondenseState Id -partial def visit : Lean.Name -> CondenseM Unit -| name => do match (<- read).env.constants.find? name with - | .none => return () - | .some _ => do - let refs := match (<- read).outRefs.find? name with +/-- Tarjan's DFS visit. Assigns a discovery `id`, pushes onto the SCC stack, + then recurses on neighbors. After visiting neighbors, if `lowLink[id] == id` + this node is the root of an SCC — pop the stack to collect all members. -/ +partial def visit : Ix.Name -> CondenseM Unit +| name => do + if !(<- read).validNames.contains name then return () + let refs := match (<- read).outRefs.get? name with | .some x => x | .none => {} - let id := (<- get).id - modify fun stt => { stt with - names := stt.names.insert name id - ids := stt.ids.insert id name - stack := stt.stack.push id - onStack := stt.onStack.insert id - lowLink := stt.lowLink.insert id id - id := id + 1 - } - for ref in refs do - match (<- read).env.constants.find? ref with - | none => continue - | some _ => do match (<- get).names.get? ref with - | .none => do - visit ref - modify fun stt => - let ll := stt.lowLink.get! id - let rll := stt.lowLink.get! (stt.names.get! ref) - { stt with lowLink := stt.lowLink.insert id (min ll rll) } - | .some id' => if (<- get).onStack.contains id' then - modify fun stt => - let ll := stt.lowLink.get! id - { stt with lowLink := stt.lowLink.insert id (min ll id') } - if id == (<- get).lowLink.get! id then - let mut stack := (<- get).stack - if !stack.isEmpty then - while true do - let top := stack.back! - stack := stack.pop - modify fun stt => { stt with - lowLink := stt.lowLink.insert top id - onStack := stt.onStack.erase top - } - if top == id then break - modify fun stt => { stt with stack := stack } + -- Assign discovery id and initialize lowLink to self + let id := (<- get).id + modify fun stt => { stt with + names := stt.names.insert name id + ids := stt.ids.insert id name + stack := stt.stack.push id + onStack := stt.onStack.insert id + lowLink := stt.lowLink.insert id id + id := id + 1 + } + for ref in refs do + if !(<- read).validNames.contains ref then continue + match (<- get).names.get? ref with + | .none => do + -- Tree edge: recurse, then propagate lowLink upward + visit ref + modify fun stt => + -- SAFETY: `id` was inserted into lowLink at line 65 before this loop + let ll := stt.lowLink.get! id + -- SAFETY: `ref` was just visited (tree edge), so `names` and `lowLink` contain it + let rll := stt.lowLink.get! (stt.names.get! ref) + { stt with lowLink := stt.lowLink.insert id (min ll rll) } + | .some id' => if (<- get).onStack.contains id' then + -- Back edge: update lowLink to the earlier discovery id + modify fun stt => + -- SAFETY: `id` was inserted into lowLink at line 65 before this loop + let ll := stt.lowLink.get! id + { stt with lowLink := stt.lowLink.insert id (min ll id') } + -- If lowLink equals our own id, we are the root of an SCC. + -- Pop the stack until we reach ourselves to collect all SCC members. + -- SAFETY: `id` was inserted into lowLink at line 65; may have been updated but is always present + if id == (<- get).lowLink.get! id then + let mut stack := (<- get).stack + if !stack.isEmpty then + while true do + let top := stack.back! + stack := stack.pop + modify fun stt => { stt with + lowLink := stt.lowLink.insert top id + onStack := stt.onStack.erase top + } + if top == id then break + modify fun stt => { stt with stack := stack } structure CondensedBlocks where - lowLinks: Map Lean.Name Lean.Name -- map constants to their lowlinks - blocks: Map Lean.Name (Set Lean.Name) -- map lowlinks to blocks - blockRefs: Map Lean.Name (Set Lean.Name) -- map lowlinks to block out-references + lowLinks: Map Ix.Name Ix.Name -- map constants to their lowlinks + blocks: Map Ix.Name (Set Ix.Name) -- map lowlinks to blocks + blockRefs: Map Ix.Name (Set Ix.Name) -- map lowlinks to block out-references deriving Inhabited, Nonempty -def condense: CondenseM CondensedBlocks := do - let mut idx := 0 - for (name,_) in (<- read).env.constants do +def condense (dbg : Bool) (total : Nat): CondenseM CondensedBlocks := do + let mut idx : Nat := 0 + let mut lastPct : Nat := 0 + -- Iterate over all names in the ref graph + for (name, _) in (<- read).outRefs do idx := idx + 1 + if dbg && total > 0 then + let pct := (idx * 100) / total + if pct >= lastPct + 10 then + dbg_trace s!" [Condense] {pct}% ({idx}/{total})" + lastPct := pct match (<- get).names.get? name with | .some _ => continue | .none => visit name - let mut blocks : Map Lean.Name (Set Lean.Name) := {} + let mut blocks : Map Ix.Name (Set Ix.Name) := {} let mut lowLinks := {} for (i, low) in (<- get).lowLink do + -- SAFETY: every id `i` in lowLink was assigned via `ids.insert id name` at line 62 let name := (<- get).ids.get! i + -- SAFETY: `low` is always a valid id — either the node's own id or one reached via back-edges let lowName := (<- get).ids.get! low lowLinks := lowLinks.insert name lowName blocks := blocks.alter lowName fun x => match x with @@ -87,15 +133,38 @@ def condense: CondenseM CondensedBlocks := do let mut blockRefs := {} let refs := (<- read).outRefs for (lo, all) in blocks do - let mut rs: Set Lean.Name := {} + let mut rs: Set Ix.Name := {} for a in all do + -- SAFETY: `a` is a member of an SCC block, so it exists as a key in the outRefs graph rs := rs.union (refs.get! a) rs := rs.filter (!all.contains ·) blockRefs := blockRefs.insert lo rs return ⟨lowLinks, blocks, blockRefs⟩ -def CondenseM.run (env: Lean.Environment) (refs: Map Lean.Name (Set Lean.Name)) - : CondensedBlocks := - Id.run (StateT.run (ReaderT.run condense ⟨env, refs⟩) CondenseState.init).1 +/-- Run SCC condensation on dependency graph (Ix.Name-based). + Takes the reference graph (map from Ix.Name to set of referenced Ix.Names). + Pass `dbg := true` and `total` (constant count) to enable progress tracing. -/ +def CondenseM.run (refs: Map Ix.Name (Set Ix.Name)) + (dbg : Bool := false) (total : Nat := 0) : CondensedBlocks := + -- Build the set of valid names from the ref graph keys + let validNames : Set Ix.Name := refs.fold (init := {}) fun acc k _ => acc.insert k + Id.run (StateT.run (ReaderT.run (condense dbg total) ⟨validNames, refs⟩) CondenseState.init).1 + +/-- Rust's CondensedBlocks structure (mirroring Rust's output format). + Used for FFI round-tripping with array-based representation. -/ +structure RustCondensedBlocks where + lowLinks : Array (Ix.Name × Ix.Name) + blocks : Array (Ix.Name × Array Ix.Name) + blockRefs : Array (Ix.Name × Array Ix.Name) + deriving Inhabited, Nonempty, Repr + +/-- Convert Rust's array-based format to Lean's map-based CondensedBlocks. -/ +def RustCondensedBlocks.toCondensedBlocks (rust : RustCondensedBlocks) : CondensedBlocks := + let lowLinks := rust.lowLinks.foldl (init := {}) fun m (k, v) => m.insert k v + let blocks := rust.blocks.foldl (init := {}) fun m (k, v) => + m.insert k (v.foldl (init := {}) fun s n => s.insert n) + let blockRefs := rust.blockRefs.foldl (init := {}) fun m (k, v) => + m.insert k (v.foldl (init := {}) fun s n => s.insert n) + { lowLinks, blocks, blockRefs } end Ix diff --git a/Ix/DecompileM.lean b/Ix/DecompileM.lean index 3617cf4a..d22fb8f7 100644 --- a/Ix/DecompileM.lean +++ b/Ix/DecompileM.lean @@ -1,704 +1,814 @@ ---import Std.Data.HashMap ---import Ix.Ixon ---import Ix.Common ---import Ix.Store ---import Ix.CompileM ---import Init.Data.List.Control --- ---open Ix.CompileM ---open Ixon hiding Substring --- ---namespace Ix --- ---/- the current Ix constant being decompiled -/ ---structure Named where --- name: Lean.Name --- addr: MetaAddress ---deriving Inhabited, Repr --- ---instance : ToString Named where --- toString n := s!"{n.addr}.{n.name}" --- ---/- The local environment for the Ix -> Lean4 decompiler -/ ---structure DecompileEnv where --- names : Map Lean.Name MetaAddress --- store : Map Address ByteArray --- univCtx : List Lean.Name --- bindCtx : List Lean.Name --- mutCtx : Std.HashMap Lean.Name Nat --- current : Named --- deriving Repr, Inhabited --- ---/- initialize from an Ixon store and a name-index to the store -/ ---def DecompileEnv.init --- (names : Map Lean.Name MetaAddress) --- (store : Map Address ByteArray) --- : DecompileEnv --- := ⟨names, store, default, default, default, default⟩ --- ---/- A Block is one of the possible sets of Lean constants we could generate --- from a pair of content and metadata Ix addresses -/ ---inductive Block where ---| prim : Block ---| defn : Def -> Block ---| recr : Lean.RecursorVal -> Block ---| axio : Lean.AxiomVal -> Block ---| quot : Lean.QuotVal -> Block ---| muts : List (List MutConst) -> Block ---deriving Repr, Inhabited, Nonempty --- ---def Block.contains (name: Lean.Name) : Block -> Bool ---| .prim => name == .mkSimple "_obj" || name == .mkSimple "_neutral" || name == .mkSimple "_unreachable" ---| .defn val => val.name == name ---| .recr val => val.name == name ---| .axio val => val.name == name ---| .quot val => val.name == name ---| .muts consts => consts.any (·.any (·.contains name)) --- ---def Block.consts : Block -> Set Lean.ConstantInfo ---| .prim => {} ---| .defn val => {defn val} ---| .recr val => {.recInfo val} ---| .axio val => {.axiomInfo val} ---| .quot val => {.quotInfo val} ---| .muts consts => consts.foldr (fun m set => set.union (m.foldr --- (fun m' set' => set'.union (mutConst m')) {})) {} ---where --- defn: Def -> Lean.ConstantInfo --- | ⟨name, lvls, type, kind, value, hints, safety, all⟩ => match kind with --- | .«definition» => .defnInfo ⟨⟨name, lvls, type⟩, value, hints, safety, all⟩ --- | .«opaque» => .opaqueInfo ⟨⟨name, lvls, type⟩, value, safety == .unsafe, all⟩ --- | .«theorem» => .thmInfo ⟨⟨name, lvls, type⟩, value, all⟩ --- mutConst: MutConst -> Set Lean.ConstantInfo --- | .defn d => {defn d} --- | .recr r => {.recInfo r} --- | .indc i => (Std.HashSet.ofList (.ctorInfo <$> i.ctors)).insert <| --- .inductInfo (⟨⟨i.name, i.levelParams, i.type⟩, i.numParams, --- i.numIndices, i.all, (·.name) <$> i.ctors, i.numNested, i.isRec, --- i.isReflexive, i.isUnsafe⟩) --- ---structure DecompileState where --- constants: Map Lean.Name Lean.ConstantInfo --- constCache: Map MetaAddress (Lean.Name × Set Lean.Name) --- exprCache: Map MetaAddress Lean.Expr --- univCache: Map MetaAddress Lean.Level --- synCache: Map Address Lean.Syntax --- nameCache: Map Address Lean.Name --- deriving Inhabited --- ---inductive DecompileError ---| freeLevel (curr: Named) (ctx: List Lean.Name) (lvl: Lean.Name) (idx: Nat) ---| mismatchedLevelName --- (curr: Named) (ctx: List Lean.Name) (got: Lean.Name) --- (exp: Lean.Name) (idx: Nat) ---| mismatchedName (curr: Named) (n m: Lean.Name) ---| mismatchedNameSet (curr: Named) (n: Lean.Name) (ms: List Lean.Name) ---| invalidBVarIndex (curr: Named) (ctx: List Lean.Name) (idx: Nat) ---| mismatchedUnivArgs (curr: Named) (d m : List Address) ---| mismatchedLevels (curr: Named) (n: Nat) (ls: List Address) ---| mismatchedRules (curr: Named) (rs: List RecursorRule) (ms: List (Address × Address)) ---| mismatchedCtors (curr: Named) (cs: List Constructor) (ms: List Address) ---| mismatchedMutIdx --- (curr: Named) (ctx: Std.HashMap Lean.Name Nat) (exp: Lean.Name) --- (idx: Nat) (got: Nat) ---| unknownMutual --- (curr: Named) (ctx: Std.HashMap Lean.Name Nat) (exp: Lean.Name) (idx: Nat) -----| transport (curr: Named) (err: TransportError) (cont meta: Address) -----| unknownName (curr: Named) (name: Lean.Name) ---| badDeserialization (addr: Address) (exp: String) (str: String) ---| unknownStoreAddress (curr: Named) (addr: Address) ---| badName (curr: Named) (ixon: Ixon) ---| badLevel (curr: Named) (data «meta»: Ixon) ---| badKVMap (curr: Named) (ixon: Ixon) ---| badKVMapMetadatum (curr: Named) («meta»: Metadatum) ---| badExpr (curr: Named) (data «meta»: Ixon) ---| badDef (curr: Named) (d: Ixon.Definition) («meta»: Metadata) ---| badRecr (curr: Named) (d: Ixon.Recursor) («meta»: Metadata) ---| badCtor (curr: Named) (ctor: Ixon.Constructor) («meta»: Ixon) ---| badIndc (curr: Named) (ctor: Ixon.Inductive) («meta»: Metadata) ---| badMuts (curr: Named) (data «meta»: Ixon) ---| badConst (curr: Named) (data «meta»: Ixon) ---| badProj (curr: Named) (block: Block) (msg: String) ---| badProjMeta (curr: Named) («meta»: Ixon) (msg: String) ---| badCache (name: Lean.Name) (set: Set Lean.Name) -----| expectedIxonMetadata (curr: Named) (exp: Address) (got: Ixon) -----| badProjection ----- (curr: Named) (name: Lean.Name) (cont meta: Address) (msg: String) -----| nonCongruentInductives (curr: Named) (x y: Ix.Inductive) -----| nameNotInBlockNames ----- (curr: Named) (block: BlockNames) (name: Lean.Name) (cont meta: Address) -----| nameNotInBlock ----- (curr: Named) (block: Block) (name: Lean.Name) (cont meta: Address) -----| mismatchedName ----- (curr: Named) (exp: Lean.Name) (got: Lean.Name) (cont meta: Address) -----| expectedNameInBlock ----- (curr: Named) (exp: Lean.Name) (got: BlockNames) (cont meta: Address) -----| expectedDefnBlock (curr: Named) (exp: Lean.Name) (got: Block) (cont meta: Address) -----| expectedMutDefBlock (curr: Named) (got: BlockNames) (cont meta: Address) -----| expectedMutIndBlock (curr: Named) (got: BlockNames) (cont meta: Address) -----| expectedMutIndConst (curr: Named) (got: Ix.Const) (cont meta: Address) -----| expectedMutDefConst (curr: Named) (got: Ix.Const) (cont meta: Address) -----| overloadedConstants (curr: Named) (x y: Lean.ConstantInfo) ---| todo ---deriving Repr --- ---def DecompileError.pretty : DecompileError -> String ---| .freeLevel c lvls n i => s!"Free level {n} at {i} with ctx {repr lvls} @ {c}" ---| .mismatchedLevelName c ctx n' n i => --- s!"Expected level name {n} at index {i} but got {n'} with context {repr ctx} @ {c}" ---| .mismatchedName c n m => --- s!"Expected name {n} got {m} @ {c}" ---| .mismatchedNameSet c n ms => --- s!"Expected name {n} in {ms} @ {c}" ---| .invalidBVarIndex c ctx i => --- s!"Bound variable {i} escapes context {ctx} @ {c}" ---| .mismatchedUnivArgs c d m => s!"mismatched univ args in {repr d} {repr m} @ {c}" ---| .mismatchedLevels c n ls => s!"mismatched levels {n} {ls} @ {c}" ---| .mismatchedRules c ras rms => s!"mismatched rules {repr ras} {rms} @ {c}" ---| .mismatchedCtors c cs ms => s!"mismatched rules {repr cs} {ms} @ {c}" ---| .mismatchedMutIdx c ctx n i i' => --- s!"expected mutual recusion index {i} at name {n} but got {i'} with context {repr ctx} @ {c}" ---| .unknownMutual c ctx n i => --- s!"DecompileError: unknown mutual name {n} with expected index {i} with context {repr ctx} @ {c}" -----| .transport curr e c m => s!"decompiler transport error {e} at {c} {m} @ {curr}" -----| .unknownName c n => s!"unknown name {n} @ {c}" ---| .badDeserialization a e s => s!"DecompileError: bad deserialization at {a}, expected {e}, error: {s}" ---| .unknownStoreAddress c x => s!"DecompileError: unknown store address {x} @ {c}" ---| .badName c i => s!"expected Name, got {repr i} @ {c}" ---| .badLevel c d m => s!"expected Level, got {repr d} {repr m} @ {c}" ---| .badKVMap c m => s!"expected KVMap, got {repr m} @ {c}" ---| .badKVMapMetadatum c m => s!"expected KVMapMetadatum, got {repr m} @ {c}" ---| .badExpr c d m => s!"expected Expr, got {repr d} {repr m} @ {c}" ---| .badDef c d m => s!"expected Def, got {repr d} {repr m} @ {c}" ---| .badRecr c d m => s!"expected Recr, got {repr d} {repr m} @ {c}" ---| .badCtor c d m => s!"expected Ctor, got {repr d} {repr m} @ {c}" ---| .badIndc c d m => s!"expected Indc, got {repr d} {repr m} @ {c}" ---| .badMuts c d m => s!"expected Muts, got {repr d} {repr m} @ {c}" ---| .badConst c d m => s!"expected const, got {repr d} {repr m} @ {c}" ---| .badProj c b n => s!"bad Block projection of {repr b}, with {n} @ {c}" ---| .badProjMeta c m n => s!"bad Block projection metadata {repr m} with {n} @ {c}" ---| .badCache n s => s!"bad cache entry, expected {n} in {repr s}" -----| .expectedIxonMetadata c x ixon => s!"expected metadata at address {x}, got {repr ixon} @ {c}" -----| .badProjection curr n c m s => s!"bad projection {n} at address {c}:{m}, {s} @ {curr}" -----| .nonCongruentInductives c x y => s!"noncongruent inductives {repr x} {repr y} @ {c}" -----| .nameNotInBlockNames curr b n c m => s!"expected block names {repr b} at {c}:{m} to contain {n} @ {curr}" -----| .nameNotInBlock curr b n c m => s!"expected block {repr b} at {c}:{m} to contain {n} @ {curr}" -----| .mismatchedName curr e g c m => ----- s!"expected name {e}, got {g} at address {c} {m} @ {curr}" -----| .expectedNameInBlock curr e b c m => ----- s!"expected name {e} in block {repr b} at address {c} {m} @ {curr}" -----| .expectedDefnBlock curr e g c m => ----- s!"expected definition named {e}, got {repr g} at address {c} {m} @ {curr}" -----| .expectedMutDefBlock curr g c m => ----- s!"expected mutual definition block, got {repr g} at address {c} {m} @ {curr}" -----| .expectedMutIndBlock curr g c m => ----- s!"expected mutual inductive block, got {repr g} at address {c} {m} @ {curr}" -----| .expectedMutIndConst curr g c m => ----- s!"expected mutual inductive constant, got {repr g} at address {c} {m} @ {curr}" -----| .expectedMutDefConst curr g c m => ----- s!"expected mutual definition constant, got {repr g} at address {c} {m} @ {curr}" -----| .overloadedConstants curr x y => ----- s!"overloaded constants, tried to overwrite {repr y} with {repr x} @ {curr}" ---| .todo => s!"todo" --- ---abbrev DecompileM := ReaderT DecompileEnv <| EStateM DecompileError DecompileState --- ---def DecompileM.run (env: DecompileEnv) (stt: DecompileState) (c : DecompileM α) --- : EStateM.Result DecompileError DecompileState α --- := EStateM.run (ReaderT.run c env) stt --- ----- add binding name to local context ---def DecompileM.withBinder (name: Lean.Name) : DecompileM α -> DecompileM α := --- withReader $ fun c => { c with bindCtx := name :: c.bindCtx } --- ----- add levels to local context ---def DecompileM.withLevels (lvls : List Lean.Name) : DecompileM α -> DecompileM α := --- withReader $ fun c => { c with univCtx := lvls } --- ----- add mutual recursion info to local context ---def DecompileM.withMutCtx (mutCtx : Std.HashMap Lean.Name Nat) --- : DecompileM α -> DecompileM α := --- withReader $ fun c => { c with mutCtx := mutCtx } --- ---def withNamed (name: Lean.Name) (cont «meta»: Address) --- : DecompileM α -> DecompileM α := --- withReader $ fun c => { c with current := ⟨name, cont, «meta»⟩ } --- ----- reset local context ---def DecompileM.resetCtx (name: Lean.Name) («meta»: MetaAddress) --- : DecompileM α -> DecompileM α := --- withReader $ fun c => { c with --- univCtx := [], bindCtx := [], mutCtx := {}, current := ⟨name, «meta»⟩ --- } --- ---def readStore [Serialize A] (addr: Address) (exp: String): DecompileM A := do --- --dbg_trace "readStore {addr}" --- match (<- read).store.find? addr with --- | some bytes => match Ixon.de bytes with --- | .ok ixon => pure ixon --- | .error e => throw <| .badDeserialization addr exp e --- | none => throw <| .unknownStoreAddress (<- read).current addr --- ---def readNat (addr: Address) : DecompileM Nat := do --- --dbg_trace "readNat {addr}" --- match (<- read).store.find? addr with --- | some bytes => return Nat.fromBytesLE bytes.data --- | none => throw <| .unknownStoreAddress (<- read).current addr --- ---def readString (addr: Address): DecompileM String := do --- --dbg_trace "readString {addr}" --- match (<- read).store.find? addr with --- | some bytes => match String.fromUTF8? bytes with --- | .some s => pure s --- | .none => throw <| .badDeserialization addr "UTF8" "" --- | none => throw <| .unknownStoreAddress (<- read).current addr --- ---def readIxon (addr: Address) : DecompileM Ixon := do --- --dbg_trace "readIxon {addr}" --- match (<- read).store.find? addr with --- | some bytes => match Ixon.de bytes with --- | .ok ixon => pure ixon --- | .error e => throw <| .badDeserialization addr "Ixon" e --- | none => throw <| .unknownStoreAddress (<- read).current addr --- ---partial def decompileName (addr: Address) : DecompileM Lean.Name := do --- match (<- get).nameCache.find? addr with --- | some name => --- --dbg_trace "decompileName {(<- read).current.name} {addr} {name}" --- pure name --- | none => do --- let name <- go (<- readIxon addr) --- --dbg_trace "decompileName {(<- read).current.name} {addr} {name}" --- modifyGet fun stt => (name, { stt with --- nameCache := stt.nameCache.insert addr name --- }) --- where --- go : Ixon -> DecompileM Lean.Name --- | .nanon => return Lean.Name.anonymous --- | .nstr n s => do --- let n' <- decompileName n --- let s' <- readString s --- return Lean.Name.str n' s' --- | .nnum n i => do --- let n' <- decompileName n --- let i' <- readNat i --- return Lean.Name.num n' i' --- | ixon => do throw <| .badName (<- read).current ixon --- ---partial def decompileLevel (addr: MetaAddress): DecompileM Lean.Level := do --- --dbg_trace s!"decompileLevel" --- match (<- get).univCache.find? addr with --- | some x => pure x --- | none => do --- let level <- go (<- readIxon addr.data) (<- readIxon addr.meta) --- modifyGet fun stt => (level, { stt with --- univCache := stt.univCache.insert addr level --- }) --- where --- go : Ixon -> Ixon -> DecompileM Lean.Level --- | .uzero, .meta ⟨[]⟩ => return .zero --- | .usucc a, .meta ⟨[.link m]⟩ => do --- let x <- decompileLevel ⟨a, m⟩ --- return .succ x --- | .umax xa ya, .meta ⟨[.link xm, .link ym]⟩ => do --- let x <- decompileLevel ⟨xa, xm⟩ --- let y <- decompileLevel ⟨ya, ym⟩ --- return .max x y --- | .uimax xa ya, .meta ⟨[.link xm, .link ym]⟩ => do --- let x <- decompileLevel ⟨xa, xm⟩ --- let y <- decompileLevel ⟨ya, ym⟩ --- return .imax x y --- | .uvar i, .meta ⟨[.link n]⟩ => do --- let name <- decompileName n --- match (<- read).univCtx[i]? with --- | some name' => do --- if name' == name then pure (.param name) --- else throw <| .mismatchedLevelName (<- read).current (<- read).univCtx name name' i --- | none => do throw <| .freeLevel (<- read).current (<- read).univCtx name i --- | d , m => do throw <| .badLevel (<- read).current d m --- ---def decompileSubstring : Ixon.Substring -> DecompileM Substring ---| ⟨s, startPos, stopPos⟩ => do pure ⟨<- readString s, ⟨startPos⟩, ⟨stopPos⟩⟩ --- ---def decompileSourceInfo : Ixon.SourceInfo -> DecompileM Lean.SourceInfo ---| .original l p t e => do --- let l' <- decompileSubstring l --- let t' <- decompileSubstring t --- pure <| .original l' ⟨p⟩ t' ⟨e⟩ ---| .synthetic p e c => pure <| .synthetic ⟨p⟩ ⟨e⟩ c ---| .none => pure .none --- ---def decompilePreresolved : Ixon.Preresolved -> DecompileM Lean.Syntax.Preresolved ---| .namespace ns => .namespace <$> decompileName ns ---| .decl n fs => .decl <$> decompileName n <*> (fs.mapM readString) --- ---partial def decompileSyntax (addr: Address): DecompileM Lean.Syntax := do --- match (<- get).synCache.find? addr with --- | some x => pure x --- | none => do --- let syn' <- go (<- readStore addr "Syntax") --- modifyGet fun stt => (syn', { stt with --- synCache := stt.synCache.insert addr syn' --- }) --- where --- go : Ixon.Syntax -> DecompileM Lean.Syntax --- | .missing => pure .missing --- | .node info kind args => do --- let info' <- decompileSourceInfo info --- let kind' <- decompileName kind --- let args' <- args.mapM decompileSyntax --- pure <| .node info' kind' ⟨args'⟩ --- | .atom info val => do --- let info' <- decompileSourceInfo info --- let val' <- readString val --- pure <| .atom info' val' --- | .ident info rawVal val preresolved => do --- let info' <- decompileSourceInfo info --- let rawVal' <- decompileSubstring rawVal --- let val' <- decompileName val --- let ps' <- preresolved.mapM decompilePreresolved --- pure <| .ident info' rawVal' val' ps' --- ---partial def decompileDataValue: Ixon.DataValue -> DecompileM Lean.DataValue ---| .ofString s => .ofString <$> readString s ---| .ofBool b => pure (.ofBool b) ---| .ofName n => .ofName <$> decompileName n ---| .ofNat i => .ofNat <$> readNat i ---| .ofInt i => .ofInt <$> readStore i "Int" ---| .ofSyntax s => .ofSyntax <$> (decompileSyntax s) --- ---partial def decompileKVMaps (addr: Address) : DecompileM (List Lean.KVMap) := do --- match (<- readIxon addr) with --- | .meta ⟨ms⟩ => ms.mapM go --- | x => throw <| .badKVMap (<- read).current x --- where --- go : Metadatum -> DecompileM Lean.KVMap --- | .kvmap xs => do --- let mut kv := {} --- for (n, d) in xs do --- let n <- decompileName n --- let d <- decompileDataValue d --- kv := kv.insert n d --- return kv --- | x => do throw <| .badKVMapMetadatum (<- read).current x --- ---partial def insertBlock (block: Block): DecompileM (Set Lean.Name) := do --- let mut set := {} --- for c in block.consts do --- modify fun stt => { stt with constants := stt.constants.insert c.name c } --- set := set.insert c.name --- return set --- ---def namesEqPatch (x y: Lean.Name) : Bool := --- let cs2 := Lean.Name.mkSimple "_cstage2" --- x == y || x == y.append cs2 || x.append cs2 == y --- ---def matchNames (x y: Lean.Name) : DecompileM α -> DecompileM α ---| a => do --- if namesEqPatch x y then a --- else throw <| .mismatchedName (<- read).current x y --- ---partial def matchBlock (n: Lean.Name) (idx: Nat) (block: Block) --- : DecompileM MutConst := go block --- where --- go : Block -> DecompileM MutConst --- | .muts mss => match mss[idx]? with --- | .some ms => match ms.find? (fun m => namesEqPatch n m.name) with --- | .some m => return m --- | .none => do throw <| .todo --- | .none => do throw <| .todo --- | _ => do throw <| .todo --- ---mutual --- ---partial def decompileExpr (addr: MetaAddress): DecompileM Lean.Expr := do --- --dbg_trace s!"decompileExpr {addr}" --- match (<- get).exprCache.find? addr with --- | some x => pure x --- | none => do --- let level <- go (<- readIxon addr.data) (<- readIxon addr.meta) --- modifyGet fun stt => (level, { stt with --- exprCache := stt.exprCache.insert addr level --- }) --- where --- mdata : List Lean.KVMap -> Lean.Expr -> Lean.Expr --- | [], x => x --- | kv::kvs, x => .mdata kv (mdata kvs x) --- univs (as ms: List Address): DecompileM (List Lean.Level) := do --- if as.length != ms.length --- then throw <| .mismatchedUnivArgs (<- read).current as ms --- else (as.zip ms).mapM fun (ua, um) => decompileLevel ⟨ua, um⟩ --- go : Ixon -> Ixon -> DecompileM Lean.Expr --- | .evar i, .meta ⟨[.link md]⟩ => do --- --dbg_trace s!"decompileExpr evar" --- let kvs <- decompileKVMaps md --- match (<- read).bindCtx[i]? with --- | some _ => return mdata kvs (.bvar i) --- | none => throw <| .invalidBVarIndex (<-read).current (<-read).bindCtx i --- | .esort ua, .meta ⟨[.link md, .link um]⟩ => do --- --dbg_trace s!"decompileExpr esort" --- let kvs <- decompileKVMaps md --- let u <- decompileLevel ⟨ua, um⟩ --- return mdata kvs (.sort u) --- | .erec idx uas, .meta ⟨[.link md, .link n, .links ums]⟩ => do --- let kvs <- decompileKVMaps md --- let us <- univs uas ums --- let name <- decompileName n --- --dbg_trace s!"decompileExpr {(<- read).current} erec {idx} {name}, md: {md}, us: {repr us}, n: {n}" --- match (<- read).mutCtx.get? name with --- | some idx' => do --- if idx' == idx then return mdata kvs (.const name us) --- else throw <| .mismatchedMutIdx (<- read).current (<- read).mutCtx name idx idx' --- | none => do throw <| .unknownMutual (<- read).current (<- read).mutCtx name idx --- | .eref rd uas, .meta ⟨[.link md, .link n, .link rm, .links ums]⟩ => do --- let name <- decompileName n --- --dbg_trace s!"decompileExpr {(<- read).current} eref {name}" --- let kvs <- decompileKVMaps md --- let us <- univs uas ums --- let (name', _) <- decompileNamedConst name ⟨rd, rm⟩ --- return mdata kvs (.const name' us) --- | .eapp fa aa, .meta ⟨[.link md, .link fm, .link am]⟩ => do --- --dbg_trace s!"decompileExpr eapp" --- let kvs <- decompileKVMaps md --- let f <- decompileExpr ⟨fa, fm⟩ --- let a <- decompileExpr ⟨aa, am⟩ --- return mdata kvs (.app f a) --- | .elam ta ba, .meta ⟨[.link md, .link n, .info i, .link tm, .link bm]⟩ => do --- --dbg_trace s!"decompileExpr elam" --- let name <- decompileName n --- let kvs <- decompileKVMaps md --- let t <- decompileExpr ⟨ta, tm⟩ --- let b <- .withBinder name (decompileExpr ⟨ba, bm⟩) --- return mdata kvs (.lam name t b i) --- | .eall ta ba, .meta ⟨[.link md, .link n, .info i, .link tm, .link bm]⟩ => do --- --dbg_trace s!"decompileExpr eall" --- let name <- decompileName n --- let kvs <- decompileKVMaps md --- let t <- decompileExpr ⟨ta, tm⟩ --- let b <- .withBinder name (decompileExpr ⟨ba, bm⟩) --- return mdata kvs (.forallE name t b i) --- | .elet nD ta va ba, .meta ⟨[.link md, .link n, .link tm, .link vm, .link bm]⟩ => do --- --dbg_trace s!"decompileExpr elet" --- let name <- decompileName n --- let kvs <- decompileKVMaps md --- let t <- decompileExpr ⟨ta, tm⟩ --- let v <- decompileExpr ⟨va, vm⟩ --- let b <- .withBinder name (decompileExpr ⟨ba, bm⟩) --- return mdata kvs (.letE name t v b nD) --- | .enat n, .meta ⟨[.link md]⟩ => do --- --dbg_trace s!"decompileExpr enat" --- let kvs <- decompileKVMaps md --- let n <- readNat n --- return mdata kvs (.lit (.natVal n)) --- | .estr n, .meta ⟨[.link md]⟩ => do --- --dbg_trace s!"decompileExpr estr" --- let kvs <- decompileKVMaps md --- let n <- readString n --- return mdata kvs (.lit (.strVal n)) --- | .eprj ta idx sa, .meta ⟨[.link md, .link n, .link tm, .link sm]⟩ => do --- --dbg_trace s!"decompileExpr eprj" --- let kvs <- decompileKVMaps md --- let name <- decompileName n --- let (name', _) <- decompileNamedConst name ⟨ta, tm⟩ --- let s <- decompileExpr ⟨sa, sm⟩ --- return mdata kvs (.proj name' idx s) --- | d , m => do throw <| .badExpr (<- read).current d m --- ---partial def decompileLevels (n: Nat) (ls: List Address) --- : DecompileM (List Lean.Name) := do --- --dbg_trace "decompileLevels" --- if ls.length != n then throw <| .mismatchedLevels (<- read).current n ls --- else ls.mapM decompileName --- ---partial def decompileDef: Ixon.Definition -> Metadata -> DecompileM Def ---| d, ⟨[.link n, .links ls, .hints h, .link tm, .link vm, .links as]⟩ => do --- let name <- decompileName n -----dbg_trace s!"decompileDef {(<- read).current} {name} {repr (<- read).mutCtx}" --- let lvls <- decompileLevels d.lvls ls --- .withLevels lvls <| do --- let t <- decompileExpr ⟨d.type, tm⟩ --- let v <- decompileExpr ⟨d.value, vm⟩ --- let all <- as.mapM decompileName --- return ⟨name, lvls, t, d.kind, v, h, d.safety, all⟩ ---| d, m => do throw <| .badDef (<- read).current d m --- ---partial def decompileRules (rs: List RecursorRule) (ms: List (Address × Address)) --- : DecompileM (List Lean.RecursorRule) := do --- if rs.length != ms.length --- then throw <| .mismatchedRules (<- read).current rs ms --- else (List.zip rs ms).mapM <| fun (r, n, rm) => do --- let n <- decompileName n --- let rhs <- decompileExpr ⟨r.rhs, rm⟩ --- return ⟨n, r.fields, rhs⟩ --- ---partial def decompileRecr: Ixon.Recursor -> Metadata -> DecompileM Rec ---| r, ⟨[.link n, .links ls, .link tm, .map rs, .links as]⟩ => do --- let name <- decompileName n -----dbg_trace s!"decompileRecr {(<- read).current} {name} {repr (<- read).mutCtx}" --- let lvls <- decompileLevels r.lvls ls --- .withLevels lvls <| do --- let t <- decompileExpr ⟨r.type, tm⟩ --- let all <- as.mapM decompileName --- let rs <- decompileRules r.rules rs --- return ⟨⟨name, lvls, t⟩, all, r.params, r.indices, r.motives, --- r.minors, rs, r.k, r.isUnsafe⟩ ---| r, m => do throw <| .badRecr (<- read).current r m --- ---partial def decompileCtors (cs: List Ixon.Constructor) (ms: List Address) --- : DecompileM (List Lean.ConstructorVal) := do --- if cs.length != ms.length --- then throw <| .mismatchedCtors (<- read).current cs ms --- else (List.zip cs ms).mapM <| fun (c, m) => do go c (<- readIxon m) --- where --- go : Ixon.Constructor -> Ixon -> DecompileM Lean.ConstructorVal --- | c, .meta ⟨[.link n, .links ls, .link tm, .link i]⟩ => do --- let name <- decompileName n --- let induct <- decompileName i --- let lvls <- decompileLevels c.lvls ls --- let type <- decompileExpr ⟨c.type, tm⟩ --- return ⟨⟨name, lvls, type⟩, induct, c.cidx, c.params, c.fields, c.isUnsafe⟩ --- | c, m => do throw <| .badCtor (<- read).current c m --- ---partial def decompileIndc: Ixon.Inductive -> Metadata -> DecompileM Ind ---| i, ⟨[.link n, .links ls, .link tm, .links cs, .links as]⟩ => do --- let name <- decompileName n -----dbg_trace s!"decompileIndc {(<- read).current} {name} {repr (<- read).mutCtx}" --- let lvls <- decompileLevels i.lvls ls --- .withLevels lvls <| do --- let t <- decompileExpr ⟨i.type, tm⟩ --- let all <- as.mapM decompileName --- let ctors <- decompileCtors i.ctors cs --- return ⟨name, lvls, t, i.params, i.indices, all, ctors, --- i.nested, i.recr, i.refl, i.isUnsafe⟩ ---| i, m => do throw <| .badIndc (<- read).current i m --- ---partial def decompileConst (addr: MetaAddress) --- : DecompileM (Lean.Name × Set Lean.Name) := do -----dbg_trace s!"decompileConst {(<- read).current} {addr} {repr (<- read).mutCtx}" --- match (<- get).constCache.find? addr with --- | some x => pure x --- | none => do --- let (name, block) <- go (<- readIxon addr.data) (<- readIxon addr.meta) --- let blockNames <- insertBlock block --- modifyGet fun stt => ((name, blockNames), { stt with --- constCache := stt.constCache.insert addr (name, blockNames) --- }) --- where --- go : Ixon -> Ixon -> DecompileM (Lean.Name × Block) --- | .defn d, .meta m@⟨(.link n)::_⟩ => do --- --dbg_trace s!"decompileConst defn" --- let name <- decompileName n --- let d <- .withMutCtx {(name, 0)} <| decompileDef d m --- return (d.name, .defn d) --- | .axio a, .meta ⟨[.link n, .links ls, .link tm]⟩ => do --- --dbg_trace s!"decompileConst axio" --- let name <- decompileName n --- let lvls <- decompileLevels a.lvls ls --- let t <- decompileExpr ⟨a.type, tm⟩ --- return (name, .axio ⟨⟨name, lvls, t⟩, a.isUnsafe⟩) --- | .quot q, .meta ⟨[.link n, .links ls, .link tm]⟩ => do --- --dbg_trace s!"decompileConst quot" --- let name <- decompileName n --- let lvls <- decompileLevels q.lvls ls --- let t <- decompileExpr ⟨q.type, tm⟩ --- return (name, .quot ⟨⟨name, lvls, t⟩, q.kind⟩) --- | .recr r, .meta m@⟨(.link n)::_⟩ => do --- --dbg_trace s!"decompileConst recr" --- let name <- decompileName n --- let r <- .withMutCtx {(name, 0)} <| decompileRecr r m --- return (r.name, .recr r) --- | .dprj ⟨idx, bd⟩, .meta ⟨[.link bm, .link m]⟩ => do --- match (<- readIxon m) with --- | .meta ⟨[.link n, .links _, .hints _, .link _, .link _, .links _]⟩ => do --- let name <- decompileName n --- let block <- decompileMuts (<- readIxon bd) (<- readIxon bm) --- match (<- matchBlock name idx block) with --- | .defn _ => pure (name, block) --- | e => throw <| .badProj (<- read).current block s!"malformed dprj at {idx} of {repr e}" --- | m => do throw <| .badProjMeta (<- read).current m "dprj" --- | .rprj ⟨idx, bd⟩, .meta ⟨[.link bm, .link m]⟩ => do --- match (<- readIxon m) with --- | .meta ⟨[.link n, .links _, .link _, .map _, .links _]⟩ => do --- let name <- decompileName n --- let block <- decompileMuts (<- readIxon bd) (<- readIxon bm) --- match (<- matchBlock name idx block) with --- | .recr _ => (pure (name, block)) --- | e => throw <| .badProj (<- read).current block s!"malformed rprj at {idx} of {repr e}" --- | m => do throw <| .badProjMeta (<- read).current m "rprj" --- | .iprj ⟨idx, bd⟩, .meta ⟨[.link bm, .link m]⟩ => do --- match (<- readIxon m) with --- | .meta ⟨[.link n, .links _, .link _, .links _, .links _]⟩ => do --- let name <- decompileName n --- let block <- decompileMuts (<- readIxon bd) (<- readIxon bm) --- match (<- matchBlock name idx block) with --- | .indc _ => (pure (name, block)) --- | e => throw <| .badProj (<- read).current block s!"malformed iprj at {idx} of {repr e}" --- | m => do throw <| .badProjMeta (<- read).current m "iprj" --- | .cprj ⟨idx, cidx, bd⟩, .meta ⟨[.link bm, .link m]⟩ => do --- match (<- readIxon m) with --- | .meta ⟨[.link n, .links _, .link _, .link i]⟩ => do --- let name <- decompileName n --- let induct <- decompileName i --- let block <- decompileMuts (<- readIxon bd) (<- readIxon bm) --- match (<- matchBlock induct idx block) with --- | .indc i => match i.ctors[cidx]? with --- | .some c => matchNames name c.name (pure (name, block)) --- | .none => do throw <| .badProj (<- read).current block s!"malformed cprj ctor index {cidx}" --- | e => throw <| .badProj (<- read).current block s!"malformed cprj at {idx} of {repr e}" --- | m => do throw <| .badProjMeta (<- read).current m "cprj" --- | .prim .obj, .meta ⟨[]⟩ => do --- --dbg_trace s!"decompileConst prim _obj" --- return (.mkSimple "_obj", .prim) --- | .prim .neutral, .meta ⟨[]⟩ => do --- --dbg_trace s!"decompileConst prim _neutral" --- return (.mkSimple "_neutral", .prim) --- | .prim .unreachable, .meta ⟨[]⟩ => do --- --dbg_trace s!"decompileConst prim _unreachable" --- return (.mkSimple "_unreachable", .prim) --- | d, m => do throw <| .badConst (<- read).current d m --- ---partial def decompileNamedConst (name: Lean.Name) (addr: MetaAddress) --- : DecompileM (Lean.Name × Set Lean.Name) := do --- --dbg_trace s!"decompileNamedConst {name} {addr}" -----dbg_trace s!"decompileNamedConst {name} {addr} {repr (<- read).mutCtx}" --- let (n, set) <- .resetCtx name addr <| decompileConst addr --- matchNames n name (pure (n, set)) --- ---partial def decompileMutConst : Ixon.MutConst -> Metadata -> DecompileM MutConst ---| .defn d, m => .defn <$> decompileDef d m ---| .recr r, m => .recr <$> decompileRecr r m ---| .indc i, m => .indc <$> decompileIndc i m --- ---partial def decompileMuts: Ixon -> Ixon -> DecompileM Block ---| ms@(.muts cs), m@(.meta ⟨[.muts names, .map ctx, .map metaMap]⟩) => do --- --dbg_trace s!"decompileMuts {(<- read).current} {repr (<- read).mutCtx}" --- if cs.length != names.length then throw <| .badMuts (<- read).current ms m --- else --- let mut map : Map Lean.Name Metadata := {} --- for (name, «meta») in metaMap do --- map := map.insert (<- decompileName name) (<- readStore «meta» "Metadata") --- let mut mutClasses := #[] --- let mut mutCtx := {} --- for (n, i) in ctx do --- mutCtx := mutCtx.insert (<- decompileName n) (<- readNat i) --- --dbg_trace s!"decompileMuts {(<- read).current} inner mutCtx {repr mutCtx}" --- for (const, names) in cs.zip names do --- let mut mutClass := #[] --- for n in names do --- let name <- decompileName n --- --dbg_trace s!"decompileMuts {(<- read).current} inner loop {name} {repr mutCtx}" --- let const' <- match map.get? name with --- | .some «meta» => .withMutCtx mutCtx <| decompileMutConst const «meta» --- | .none => do throw <| .badMuts (<- read).current ms m --- mutClass := mutClass.push const' --- mutClasses := mutClasses.push mutClass --- return .muts (Array.toList <$> mutClasses).toList ---| ms, m => do throw <| .badMuts (<- read).current ms m --- ---end --- ---end Ix --- -----def decompileEnv : DecompileM Unit := do ----- for (n, (anon, meta)) in (<- read).names do ----- let _ <- ensureBlock n anon meta +/- + DecompileM: Decompilation from the Ixon format to Ix types. + + This module decompiles the Ixon format (with indirection tables, sharing, + and per-expression metadata arenas) back to Ix expressions and constants. + It is the inverse of the compilation pipeline. + + The output is Ix.Expr / Ix.ConstantInfo (with content hashes), NOT Lean.Expr. + Conversion from Ix.Expr → Lean.Expr (decanonicalization) is a separate trivial step. + This design enables cheap hash-based comparison of decompiled results. +-/ + +import Std.Data.HashMap +import Ix.Ixon +import Ix.Address +import Ix.Environment +import Ix.Common + +namespace Ix.DecompileM + +open Ixon + +/-! ## Name Helpers -/ + +/-- Convert Ix.Name to Lean.Name by stripping embedded hashes. -/ +def ixNameToLean : Ix.Name → Lean.Name + | .anonymous _ => .anonymous + | .str parent s _ => .str (ixNameToLean parent) s + | .num parent n _ => .num (ixNameToLean parent) n + +/-- Resolve an address to Ix.Name from the names table. -/ +def resolveIxName (names : Std.HashMap Address Ix.Name) (addr : Address) : Option Ix.Name := + names.get? addr + +/-! ## Error Type -/ + +/-- Decompilation error type. Variant order matches Rust DecompileError (tags 0–10). -/ +inductive DecompileError where + | invalidRefIndex (idx : UInt64) (refsLen : Nat) (constant : String) + | invalidUnivIndex (idx : UInt64) (univsLen : Nat) (constant : String) + | invalidShareIndex (idx : UInt64) (max : Nat) (constant : String) + | invalidRecIndex (idx : UInt64) (ctxSize : Nat) (constant : String) + | invalidUnivVarIndex (idx : UInt64) (max : Nat) (constant : String) + | missingAddress (addr : Address) + | missingMetadata (addr : Address) + | blobNotFound (addr : Address) + | badBlobFormat (addr : Address) (expected : String) + | badConstantFormat (msg : String) + | serializeError (err : Ixon.SerializeError) + deriving Repr, BEq + +def DecompileError.toString : DecompileError → String + | .invalidRefIndex idx len c => s!"Invalid ref index {idx} in '{c}': refs table has {len} entries" + | .invalidUnivIndex idx len c => s!"Invalid univ index {idx} in '{c}': univs table has {len} entries" + | .invalidShareIndex idx max c => s!"Invalid share index {idx} in '{c}': sharing vector has {max} entries" + | .invalidRecIndex idx sz c => s!"Invalid rec index {idx} in '{c}': mutual context has {sz} entries" + | .invalidUnivVarIndex idx max c => s!"Invalid univ var index {idx} in '{c}': only {max} level params" + | .missingAddress addr => s!"Missing address: {addr}" + | .missingMetadata addr => s!"Missing metadata for: {addr}" + | .blobNotFound addr => s!"Blob not found at: {addr}" + | .badBlobFormat addr expected => s!"Bad blob format at {addr}, expected {expected}" + | .badConstantFormat msg => s!"Bad constant format: {msg}" + | .serializeError err => s!"Serialization error: {err}" + +instance : ToString DecompileError := ⟨DecompileError.toString⟩ + +/-! ## Context and State Structures -/ + +/-- Global decompilation environment (reader, immutable). -/ +structure DecompileEnv where + ixonEnv : Ixon.Env + deriving Inhabited + +/-- Per-block context for decompiling a single constant (reader, immutable per-block). -/ +structure BlockCtx where + refs : Array Address + univs : Array Ixon.Univ + sharing : Array Ixon.Expr + mutCtx : Array Ix.Name -- mutual context: index = Rec index + univParams : Array Ix.Name -- universe parameter names + arena : ExprMetaArena + deriving Inhabited + +/-- Per-block mutable state (caches). -/ +structure BlockState where + exprCache : Std.HashMap (UInt64 × UInt64) Ix.Expr := {} + univCache : Std.HashMap UInt64 Ix.Level := {} + deriving Inhabited + +/-! ## DecompileM Monad -/ + +abbrev DecompileM := ReaderT (DecompileEnv × BlockCtx) (ExceptT DecompileError (StateT BlockState Id)) + +def DecompileM.run (env : DecompileEnv) (ctx : BlockCtx) (stt : BlockState) + (m : DecompileM α) : Except DecompileError (α × BlockState) := + match StateT.run (ExceptT.run (ReaderT.run m (env, ctx))) stt with + | (Except.ok a, stt') => Except.ok (a, stt') + | (Except.error e, _) => Except.error e + +def getEnv : DecompileM DecompileEnv := (·.1) <$> read +def getCtx : DecompileM BlockCtx := (·.2) <$> read + +def withBlockCtx (ctx : BlockCtx) (m : DecompileM α) : DecompileM α := + fun (env, _) => m (env, ctx) + +/-! ## Lookup Helpers -/ + +/-- Resolve Address → Ix.Name via names table, or throw. -/ +def lookupNameAddr (addr : Address) : DecompileM Ix.Name := do + match (← getEnv).ixonEnv.names.get? addr with + | some n => pure n + | none => throw (.missingAddress addr) + +/-- Resolve Address → Ix.Name via names table, or anonymous. -/ +def lookupNameAddrOrAnon (addr : Address) : DecompileM Ix.Name := do + match (← getEnv).ixonEnv.names.get? addr with + | some n => pure n + | none => pure Ix.Name.mkAnon + +/-- Resolve constant Address → Ix.Name via addrToName. -/ +def lookupConstName (addr : Address) : DecompileM Ix.Name := do + match (← getEnv).ixonEnv.addrToName.get? addr with + | some n => pure n + | none => throw (.missingAddress addr) + +def lookupBlob (addr : Address) : DecompileM ByteArray := do + match (← getEnv).ixonEnv.blobs.get? addr with + | some blob => pure blob + | none => throw (.blobNotFound addr) + +def getRef (idx : UInt64) : DecompileM Address := do + let ctx ← getCtx + match ctx.refs[idx.toNat]? with + | some addr => pure addr + | none => throw (.invalidRefIndex idx ctx.refs.size "") + +def getMutName (idx : UInt64) : DecompileM Ix.Name := do + let ctx ← getCtx + match ctx.mutCtx[idx.toNat]? with + | some name => pure name + | none => throw (.invalidRecIndex idx ctx.mutCtx.size "") + +def readNatBlob (blob : ByteArray) : Nat := Nat.fromBytesLE blob.data + +def readStringBlob (blob : ByteArray) : DecompileM String := + match String.fromUTF8? blob with + | some s => pure s + -- TODO: pass actual blob address instead of empty for better error diagnostics + | none => throw (.badBlobFormat ⟨ByteArray.empty⟩ "UTF-8 string") + +/-! ## Universe Decompilation → Ix.Level -/ + +partial def decompileUniv (u : Ixon.Univ) : DecompileM Ix.Level := do + let ctx ← getCtx + match u with + | .zero => pure Ix.Level.mkZero + | .succ inner => Ix.Level.mkSucc <$> decompileUniv inner + | .max a b => Ix.Level.mkMax <$> decompileUniv a <*> decompileUniv b + | .imax a b => Ix.Level.mkIMax <$> decompileUniv a <*> decompileUniv b + | .var idx => + match ctx.univParams[idx.toNat]? with + | some name => pure (Ix.Level.mkParam name) + | none => throw (.invalidUnivVarIndex idx ctx.univParams.size "") + +def getUniv (idx : UInt64) : DecompileM Ix.Level := do + let stt ← get + if let some cached := stt.univCache.get? idx then return cached + let ctx ← getCtx + match ctx.univs[idx.toNat]? with + | some u => + let lvl ← decompileUniv u + modify fun s => { s with univCache := s.univCache.insert idx lvl } + pure lvl + | none => throw (.invalidUnivIndex idx ctx.univs.size "") + +def decompileUnivIndices (indices : Array UInt64) : DecompileM (Array Ix.Level) := + indices.mapM getUniv + +/-! ## DataValue and KVMap Decompilation → Ix types -/ + +def deserializeInt (bytes : ByteArray) : DecompileM Ix.Int := + if bytes.size == 0 then throw (.badConstantFormat "deserialize_int: empty") + else + let tag := bytes.get! 0 + let rest := bytes.extract 1 bytes.size + let n := Nat.fromBytesLE rest.data + if tag == 0 then pure (.ofNat n) + else if tag == 1 then pure (.negSucc n) + else throw (.badConstantFormat "deserialize_int: invalid tag") + +/-! ### Blob cursor helpers -/ + +structure BlobCursor where + bytes : ByteArray + pos : Nat + deriving Inhabited + +def BlobCursor.readByte (c : BlobCursor) : DecompileM (UInt8 × BlobCursor) := + if c.pos < c.bytes.size then + pure (c.bytes.get! c.pos, { c with pos := c.pos + 1 }) + else throw (.badConstantFormat "BlobCursor: unexpected EOF") + +def BlobCursor.readTag0 (c : BlobCursor) : DecompileM (UInt64 × BlobCursor) := do + let (head, c) ← c.readByte + if head < 128 then pure (head.toUInt64, c) + else + let extraBytes := (head % 128).toNat + 1 + if c.pos + extraBytes > c.bytes.size then + throw (.badConstantFormat "BlobCursor.readTag0: need more bytes") + let mut val : UInt64 := 0 + let mut cur := c + for i in [:extraBytes] do + let (b, c') ← cur.readByte + val := val ||| (b.toUInt64 <<< (i * 8).toUInt64) + cur := c' + pure (val, cur) + +def BlobCursor.readAddr (c : BlobCursor) : DecompileM (Address × BlobCursor) := + if c.pos + 32 ≤ c.bytes.size then + pure (⟨c.bytes.extract c.pos (c.pos + 32)⟩, { c with pos := c.pos + 32 }) + else throw (.badConstantFormat "BlobCursor.readAddr: need 32 bytes") + +def resolveNameFromBlob (addr : Address) : DecompileM Ix.Name := + lookupNameAddrOrAnon addr + +def resolveStringFromBlob (addr : Address) : DecompileM String := do + lookupBlob addr >>= readStringBlob + +/-! ### Syntax deserialization → Ix.Syntax -/ + +def deserializeSubstring (c : BlobCursor) : DecompileM (Ix.Substring × BlobCursor) := do + let (strAddr, c) ← c.readAddr + let s ← resolveStringFromBlob strAddr + let (startPos, c) ← c.readTag0 + let (stopPos, c) ← c.readTag0 + pure (⟨s, startPos.toNat, stopPos.toNat⟩, c) + +def deserializeSourceInfo (c : BlobCursor) : DecompileM (Ix.SourceInfo × BlobCursor) := do + let (tag, c) ← c.readByte + match tag with + | 0 => + let (leading, c) ← deserializeSubstring c + let (leadingPos, c) ← c.readTag0 + let (trailing, c) ← deserializeSubstring c + let (trailingPos, c) ← c.readTag0 + pure (.original leading leadingPos.toNat trailing trailingPos.toNat, c) + | 1 => + let (start, c) ← c.readTag0 + let (stop, c) ← c.readTag0 + let (canonical, c) ← c.readByte + pure (.synthetic start.toNat stop.toNat (canonical != 0), c) + | 2 => pure (.none, c) + | _ => throw (.badConstantFormat s!"deserializeSourceInfo: invalid tag {tag}") + +def deserializePreresolved (c : BlobCursor) : DecompileM (Ix.SyntaxPreresolved × BlobCursor) := do + let (tag, c) ← c.readByte + match tag with + | 0 => + let (nameAddr, c) ← c.readAddr + let name ← resolveNameFromBlob nameAddr + pure (.namespace name, c) + | 1 => + let (nameAddr, c) ← c.readAddr + let name ← resolveNameFromBlob nameAddr + let (count, c) ← c.readTag0 + let mut fields : Array String := #[] + let mut cur := c + for _ in [:count.toNat] do + let (fieldAddr, c') ← cur.readAddr + let field ← resolveStringFromBlob fieldAddr + fields := fields.push field + cur := c' + pure (.decl name fields, cur) + | _ => throw (.badConstantFormat s!"deserializePreresolved: invalid tag {tag}") + +partial def deserializeSyntax (c : BlobCursor) : DecompileM (Ix.Syntax × BlobCursor) := do + let (tag, c) ← c.readByte + match tag with + | 0 => pure (.missing, c) + | 1 => + let (info, c) ← deserializeSourceInfo c + let (kindAddr, c) ← c.readAddr + let kind ← resolveNameFromBlob kindAddr + let (argCount, c) ← c.readTag0 + let mut args : Array Ix.Syntax := #[] + let mut cur := c + for _ in [:argCount.toNat] do + let (arg, c') ← deserializeSyntax cur + args := args.push arg + cur := c' + pure (.node info kind args, cur) + | 2 => + let (info, c) ← deserializeSourceInfo c + let (valAddr, c) ← c.readAddr + let val ← resolveStringFromBlob valAddr + pure (.atom info val, c) + | 3 => + let (info, c) ← deserializeSourceInfo c + let (rawVal, c) ← deserializeSubstring c + let (valAddr, c) ← c.readAddr + let val ← resolveNameFromBlob valAddr + let (prCount, c) ← c.readTag0 + let mut preresolved : Array Ix.SyntaxPreresolved := #[] + let mut cur := c + for _ in [:prCount.toNat] do + let (pr, c') ← deserializePreresolved cur + preresolved := preresolved.push pr + cur := c' + pure (.ident info rawVal val preresolved, cur) + | _ => throw (.badConstantFormat s!"deserializeSyntax: invalid tag {tag}") + +def deserializeSyntaxBlob (blob : ByteArray) : DecompileM Ix.Syntax := do + let (syn, _) ← deserializeSyntax ⟨blob, 0⟩ + pure syn + +/-- Decompile an Ixon DataValue to an Ix DataValue. -/ +def decompileDataValue (dv : Ixon.DataValue) : DecompileM Ix.DataValue := + match dv with + | .ofString addr => do pure (.ofString (← lookupBlob addr >>= readStringBlob)) + | .ofBool b => pure (.ofBool b) + | .ofName addr => do pure (.ofName (← lookupNameAddr addr)) + | .ofNat addr => do pure (.ofNat (readNatBlob (← lookupBlob addr))) + | .ofInt addr => do pure (.ofInt (← lookupBlob addr >>= deserializeInt)) + | .ofSyntax addr => do pure (.ofSyntax (← lookupBlob addr >>= deserializeSyntaxBlob)) + +/-- Decompile an Ixon KVMap to Ix mdata format. -/ +def decompileKVMap (kvm : Ixon.KVMap) : DecompileM (Array (Ix.Name × Ix.DataValue)) := do + let mut result : Array (Ix.Name × Ix.DataValue) := #[] + for (keyAddr, dataVal) in kvm do + let keyName ← lookupNameAddr keyAddr + let val ← decompileDataValue dataVal + result := result.push (keyName, val) + pure result + +/-! ## Mdata Application -/ + +/-- Apply collected mdata layers to an Ix.Expr (outermost-first). -/ +def applyMdata (expr : Ix.Expr) (layers : Array (Array (Ix.Name × Ix.DataValue))) : Ix.Expr := + layers.foldr (init := expr) fun mdata e => Ix.Expr.mkMData mdata e + +/-! ## Expression Decompilation → Ix.Expr -/ + +def getArenaNode (idx : UInt64) : DecompileM ExprMetaData := do + pure ((← getCtx).arena.nodes[idx.toNat]?.getD .leaf) + +/-- Decompile an expression to Ix.Expr with arena-based metadata. -/ +partial def decompileExpr (e : Ixon.Expr) (arenaIdx : UInt64) : DecompileM Ix.Expr := do + -- 1. Expand Share transparently + match e with + | .share idx => + let ctx ← getCtx + match ctx.sharing[idx.toNat]? with + | some sharedExpr => decompileExpr sharedExpr arenaIdx + | none => throw (.invalidShareIndex idx ctx.sharing.size "") + | _ => + + -- Check cache + let cacheKey := (hash e, arenaIdx) + if let some cached := (← get).exprCache.get? cacheKey then return cached + + -- 2. Follow mdata chain + let mut currentIdx := arenaIdx + let mut mdataLayers : Array (Array (Ix.Name × Ix.DataValue)) := #[] + let mut done := false + while !done do + match ← getArenaNode currentIdx with + | .mdata kvmaps child => + for kvm in kvmaps do + mdataLayers := mdataLayers.push (← decompileKVMap kvm) + currentIdx := child + | _ => done := true + + let node ← getArenaNode currentIdx + + -- 3. Match (arenaNode, ixonExpr) → Ix.Expr + let result ← match node, e with + | _, .var idx => + pure (applyMdata (Ix.Expr.mkBVar idx.toNat) mdataLayers) + + | _, .sort univIdx => do + pure (applyMdata (Ix.Expr.mkSort (← getUniv univIdx)) mdataLayers) + + | _, .nat refIdx => do + let blob ← getRef refIdx >>= lookupBlob + pure (applyMdata (Ix.Expr.mkLit (.natVal (readNatBlob blob))) mdataLayers) + + | _, .str refIdx => do + let blob ← getRef refIdx >>= lookupBlob + let s ← readStringBlob blob + pure (applyMdata (Ix.Expr.mkLit (.strVal s)) mdataLayers) + + -- Ref with arena metadata + | .ref nameAddr, .ref refIdx univIndices => do + let name ← match (← getEnv).ixonEnv.names.get? nameAddr with + | some n => pure n + | none => getRef refIdx >>= lookupConstName + let lvls ← decompileUnivIndices univIndices + pure (applyMdata (Ix.Expr.mkConst name lvls) mdataLayers) + + -- Ref without arena metadata + | _, .ref refIdx univIndices => do + let name ← getRef refIdx >>= lookupConstName + let lvls ← decompileUnivIndices univIndices + pure (applyMdata (Ix.Expr.mkConst name lvls) mdataLayers) + + -- Rec with arena metadata + | .ref nameAddr, .recur recIdx univIndices => do + let name ← match (← getEnv).ixonEnv.names.get? nameAddr with + | some n => pure n + | none => getMutName recIdx + let lvls ← decompileUnivIndices univIndices + pure (applyMdata (Ix.Expr.mkConst name lvls) mdataLayers) + + -- Rec without arena metadata + | _, .recur recIdx univIndices => do + let name ← getMutName recIdx + let lvls ← decompileUnivIndices univIndices + pure (applyMdata (Ix.Expr.mkConst name lvls) mdataLayers) + + -- App with arena metadata + | .app funIdx argIdx, .app fn arg => do + let fnExpr ← decompileExpr fn funIdx + let argExpr ← decompileExpr arg argIdx + pure (applyMdata (Ix.Expr.mkApp fnExpr argExpr) mdataLayers) + + | _, .app fn arg => do + let fnExpr ← decompileExpr fn UInt64.MAX + let argExpr ← decompileExpr arg UInt64.MAX + pure (applyMdata (Ix.Expr.mkApp fnExpr argExpr) mdataLayers) + + -- Lam with arena metadata + | .binder nameAddr info tyChild bodyChild, .lam ty body => do + let binderName ← lookupNameAddrOrAnon nameAddr + let tyExpr ← decompileExpr ty tyChild + let bodyExpr ← decompileExpr body bodyChild + pure (applyMdata (Ix.Expr.mkLam binderName tyExpr bodyExpr info) mdataLayers) + + | _, .lam ty body => do + let tyExpr ← decompileExpr ty UInt64.MAX + let bodyExpr ← decompileExpr body UInt64.MAX + pure (applyMdata (Ix.Expr.mkLam Ix.Name.mkAnon tyExpr bodyExpr .default) mdataLayers) + + -- ForallE with arena metadata + | .binder nameAddr info tyChild bodyChild, .all ty body => do + let binderName ← lookupNameAddrOrAnon nameAddr + let tyExpr ← decompileExpr ty tyChild + let bodyExpr ← decompileExpr body bodyChild + pure (applyMdata (Ix.Expr.mkForallE binderName tyExpr bodyExpr info) mdataLayers) + + | _, .all ty body => do + let tyExpr ← decompileExpr ty UInt64.MAX + let bodyExpr ← decompileExpr body UInt64.MAX + pure (applyMdata (Ix.Expr.mkForallE Ix.Name.mkAnon tyExpr bodyExpr .default) mdataLayers) + + -- Let with arena metadata + | .letBinder nameAddr tyChild valChild bodyChild, .letE nonDep ty val body => do + let letName ← lookupNameAddrOrAnon nameAddr + let tyExpr ← decompileExpr ty tyChild + let valExpr ← decompileExpr val valChild + let bodyExpr ← decompileExpr body bodyChild + pure (applyMdata (Ix.Expr.mkLetE letName tyExpr valExpr bodyExpr nonDep) mdataLayers) + + | _, .letE nonDep ty val body => do + let tyExpr ← decompileExpr ty UInt64.MAX + let valExpr ← decompileExpr val UInt64.MAX + let bodyExpr ← decompileExpr body UInt64.MAX + pure (applyMdata (Ix.Expr.mkLetE Ix.Name.mkAnon tyExpr valExpr bodyExpr nonDep) mdataLayers) + + -- Prj with arena metadata + | .prj structNameAddr child, .prj _typeRefIdx fieldIdx val => do + let typeName ← lookupNameAddr structNameAddr + let valExpr ← decompileExpr val child + pure (applyMdata (Ix.Expr.mkProj typeName fieldIdx.toNat valExpr) mdataLayers) + + | _, .prj typeRefIdx fieldIdx val => do + let typeName ← getRef typeRefIdx >>= lookupConstName + let valExpr ← decompileExpr val UInt64.MAX + pure (applyMdata (Ix.Expr.mkProj typeName fieldIdx.toNat valExpr) mdataLayers) + + | _, .share _ => throw (.badConstantFormat "unexpected Share in decompileExpr") + + modify fun s => { s with exprCache := s.exprCache.insert cacheKey result } + pure result + +/-! ## Type Conversion Helpers -/ + +def toIxSafety : DefinitionSafety → Lean.DefinitionSafety + | .unsaf => .unsafe | .safe => .safe | .part => .partial + +def toIxQuotKind : QuotKind → Lean.QuotKind + | .type => .type | .ctor => .ctor | .lift => .lift | .ind => .ind + +/-! ## ConstantMeta Extraction Helpers -/ + +def getNameAddr : ConstantMeta → Option Address + | .defn name .. => some name | .axio name .. => some name + | .quot name .. => some name | .indc name .. => some name + | .ctor name .. => some name | .recr name .. => some name + | .empty => none + +def getLvlAddrs : ConstantMeta → Array Address + | .defn _ lvls .. => lvls | .axio _ lvls .. => lvls + | .quot _ lvls .. => lvls | .indc _ lvls .. => lvls + | .ctor _ lvls .. => lvls | .recr _ lvls .. => lvls + | .empty => #[] + +def getArenaAndTypeRoot : ConstantMeta → ExprMetaArena × UInt64 + | .defn _ _ _ _ _ arena typeRoot _ => (arena, typeRoot) + | .axio _ _ arena typeRoot => (arena, typeRoot) + | .quot _ _ arena typeRoot => (arena, typeRoot) + | .indc _ _ _ _ _ arena typeRoot => (arena, typeRoot) + | .ctor _ _ _ arena typeRoot => (arena, typeRoot) + | .recr _ _ _ _ _ arena typeRoot _ => (arena, typeRoot) + | .empty => ({}, 0) + +def getAllAddrs : ConstantMeta → Array Address + | .defn _ _ _ all .. => all | .indc _ _ _ all .. => all + | .recr _ _ _ all .. => all | _ => #[] + +def getCtxAddrs : ConstantMeta → Array Address + | .defn _ _ _ _ ctx .. => ctx | .indc _ _ _ _ ctx .. => ctx + | .recr _ _ _ _ ctx .. => ctx | _ => #[] + +/-- Resolve name from ConstantMeta. -/ +def decompileMetaName (cMeta : ConstantMeta) : DecompileM Ix.Name := + match getNameAddr cMeta with + | some addr => lookupNameAddr addr + | none => throw (.badConstantFormat "empty metadata, no name") + +/-- Resolve level param names from ConstantMeta. -/ +def decompileMetaLevels (cMeta : ConstantMeta) : DecompileM (Array Ix.Name) := + (getLvlAddrs cMeta).mapM lookupNameAddr + +/-- Resolve all names from ConstantMeta. -/ +def decompileMetaAll (cMeta : ConstantMeta) (fallback : Ix.Name) : DecompileM (Array Ix.Name) := do + let addrs := getAllAddrs cMeta + if addrs.isEmpty then return #[fallback] + let mut names : Array Ix.Name := #[] + for addr in addrs do + match (← getEnv).ixonEnv.names.get? addr with + | some n => names := names.push n + | none => pure () + return if names.isEmpty then #[fallback] else names + +/-- Resolve ctx names from ConstantMeta. -/ +def decompileMetaCtx (cMeta : ConstantMeta) : DecompileM (Array Ix.Name) := do + let env ← getEnv + pure <| (getCtxAddrs cMeta).filterMap fun addr => env.ixonEnv.names.get? addr + +/-- Build a BlockCtx from a Constant. -/ +def mkBlockCtx (cnst : Constant) (mutCtx : Array Ix.Name) + (univParams : Array Ix.Name) (arena : ExprMetaArena) : BlockCtx := + { refs := cnst.refs, univs := cnst.univs, sharing := cnst.sharing, mutCtx, univParams, arena } + +/-- Run with fresh block context and state. -/ +def withFreshBlock (cnst : Constant) (mutCtx : Array Ix.Name) + (univParams : Array Ix.Name) (arena : ExprMetaArena) + (m : DecompileM α) : DecompileM α := do + let env ← getEnv + match DecompileM.run env (mkBlockCtx cnst mutCtx univParams arena) {} m with + | .ok (a, _) => pure a + | .error e => throw e + +/-! ## Constant Decompilers → Ix.ConstantInfo -/ + +def decompileDefinition (d : Ixon.Definition) (cnst : Constant) (cMeta : ConstantMeta) + : DecompileM Ix.ConstantInfo := do + let name ← decompileMetaName cMeta + let univParams ← decompileMetaLevels cMeta + let allNames ← decompileMetaAll cMeta name + let mutCtx ← decompileMetaCtx cMeta + let (hints, valueRoot) := match cMeta with + | .defn _ _ hints _ _ _ _ valueRoot => (hints, valueRoot) + | _ => (.opaque, (0 : UInt64)) + let (arena, typeRoot) := getArenaAndTypeRoot cMeta + withFreshBlock cnst mutCtx univParams arena do + let typeExpr ← decompileExpr d.typ typeRoot + let valueExpr ← decompileExpr d.value valueRoot + let cv : Ix.ConstantVal := { name, levelParams := univParams, type := typeExpr } + match d.kind with + | .defn => pure (.defnInfo { cnst := cv, value := valueExpr, hints, safety := toIxSafety d.safety, all := allNames }) + | .thm => pure (.thmInfo { cnst := cv, value := valueExpr, all := allNames }) + | .opaq => pure (.opaqueInfo { cnst := cv, value := valueExpr, isUnsafe := d.safety == .unsaf, all := allNames }) + +def decompileAxiom (a : Ixon.Axiom) (cnst : Constant) (cMeta : ConstantMeta) + : DecompileM Ix.ConstantInfo := do + let name ← decompileMetaName cMeta + let univParams ← decompileMetaLevels cMeta + let (arena, typeRoot) := getArenaAndTypeRoot cMeta + withFreshBlock cnst #[] univParams arena do + let typeExpr ← decompileExpr a.typ typeRoot + pure (.axiomInfo { cnst := { name, levelParams := univParams, type := typeExpr }, isUnsafe := a.isUnsafe }) + +def decompileQuotient (q : Ixon.Quotient) (cnst : Constant) (cMeta : ConstantMeta) + : DecompileM Ix.ConstantInfo := do + let name ← decompileMetaName cMeta + let univParams ← decompileMetaLevels cMeta + let (arena, typeRoot) := getArenaAndTypeRoot cMeta + withFreshBlock cnst #[] univParams arena do + let typeExpr ← decompileExpr q.typ typeRoot + pure (.quotInfo { cnst := { name, levelParams := univParams, type := typeExpr }, kind := toIxQuotKind q.kind }) + +def decompileConstructor (ctor : Ixon.Constructor) (cnst : Constant) + (cMeta : ConstantMeta) (inductName : Ix.Name) + : DecompileM Ix.ConstructorVal := do + let name ← decompileMetaName cMeta + let univParams ← decompileMetaLevels cMeta + let (arena, typeRoot) := getArenaAndTypeRoot cMeta + withFreshBlock cnst #[] univParams arena do + let typeExpr ← decompileExpr ctor.typ typeRoot + pure { cnst := { name, levelParams := univParams, type := typeExpr }, + induct := inductName, cidx := ctor.cidx.toNat, + numParams := ctor.params.toNat, numFields := ctor.fields.toNat, + isUnsafe := ctor.isUnsafe } + +def decompileRecursor (rec : Ixon.Recursor) (cnst : Constant) (cMeta : ConstantMeta) + : DecompileM Ix.ConstantInfo := do + let name ← decompileMetaName cMeta + let univParams ← decompileMetaLevels cMeta + let allNames ← decompileMetaAll cMeta name + let mutCtx ← decompileMetaCtx cMeta + let (ruleRoots, ruleAddrs) := match cMeta with + | .recr _ _ rules _ _ _ _ ruleRoots => (ruleRoots, rules) + | _ => (#[], #[]) + let (arena, typeRoot) := getArenaAndTypeRoot cMeta + withFreshBlock cnst mutCtx univParams arena do + let typeExpr ← decompileExpr rec.typ typeRoot + let ruleNames ← ruleAddrs.mapM lookupNameAddr + let mut rules : Array Ix.RecursorRule := #[] + for h : i in [:rec.rules.size] do + let rule := rec.rules[i] + let rhsRoot := ruleRoots[i]?.getD 0 + let rhs ← decompileExpr rule.rhs rhsRoot + let ctorName := ruleNames[i]?.getD Ix.Name.mkAnon + rules := rules.push { ctor := ctorName, nfields := rule.fields.toNat, rhs } + pure (.recInfo { cnst := { name, levelParams := univParams, type := typeExpr }, + all := allNames, numParams := rec.params.toNat, + numIndices := rec.indices.toNat, numMotives := rec.motives.toNat, + numMinors := rec.minors.toNat, rules, k := rec.k, isUnsafe := rec.isUnsafe }) + +def decompileInductive (ind : Ixon.Inductive) (cnst : Constant) (cMeta : ConstantMeta) + : DecompileM (Ix.InductiveVal × Array Ix.ConstructorVal) := do + let name ← decompileMetaName cMeta + let univParams ← decompileMetaLevels cMeta + let allNames ← decompileMetaAll cMeta name + let mutCtx ← decompileMetaCtx cMeta + let ctorNameAddrs := match cMeta with + | .indc _ _ ctors .. => ctors | _ => #[] + let (arena, typeRoot) := getArenaAndTypeRoot cMeta + let typeExpr ← withFreshBlock cnst mutCtx univParams arena do + decompileExpr ind.typ typeRoot + let env ← getEnv + let mut ctors : Array Ix.ConstructorVal := #[] + let mut ctorNames : Array Ix.Name := #[] + for h : i in [:ind.ctors.size] do + let ctor := ind.ctors[i] + let ctorMeta : ConstantMeta := + if let some ctorAddr := ctorNameAddrs[i]? then + if let some ctorIxName := env.ixonEnv.names.get? ctorAddr then + env.ixonEnv.named.fold (init := ConstantMeta.empty) fun acc ixN named => + if ixN == ctorIxName then named.constMeta else acc + else .empty + else .empty + let ctorVal ← decompileConstructor ctor cnst ctorMeta name + ctorNames := ctorNames.push ctorVal.cnst.name + ctors := ctors.push ctorVal + let indVal : Ix.InductiveVal := { + cnst := { name, levelParams := univParams, type := typeExpr }, + numParams := ind.params.toNat, numIndices := ind.indices.toNat, + all := allNames, ctors := ctorNames, + numNested := ind.nested.toNat, isRec := ind.recr, + isUnsafe := ind.isUnsafe, isReflexive := ind.refl } + pure (indVal, ctors) + +/-! ## Projection Handling -/ + +def decompileProjection (cnst : Constant) (cMeta : ConstantMeta) + (mutuals : Array MutConst) + (blockSharing : Array Ixon.Expr) (blockRefs : Array Address) (blockUnivs : Array Ixon.Univ) + : DecompileM (Array (Ix.Name × Ix.ConstantInfo)) := do + let bc : Constant := { info := cnst.info, sharing := blockSharing, refs := blockRefs, univs := blockUnivs } + match cnst.info with + | .dPrj proj => + match mutuals[proj.idx.toNat]? with + | some (.defn d) => + let info ← decompileDefinition d bc cMeta + pure #[(info.getCnst.name, info)] + | _ => throw (.badConstantFormat s!"dPrj index {proj.idx} not found") + | .iPrj proj => + match mutuals[proj.idx.toNat]? with + | some (.indc ind) => + let (indVal, ctorVals) ← decompileInductive ind bc cMeta + let mut results := #[(indVal.cnst.name, Ix.ConstantInfo.inductInfo indVal)] + for ctor in ctorVals do + results := results.push (ctor.cnst.name, .ctorInfo ctor) + pure results + | _ => throw (.badConstantFormat s!"iPrj index {proj.idx} not found") + | .rPrj proj => + match mutuals[proj.idx.toNat]? with + | some (.recr rec) => + let info ← decompileRecursor rec bc cMeta + pure #[(info.getCnst.name, info)] + | _ => throw (.badConstantFormat s!"rPrj index {proj.idx} not found") + | .cPrj _ => pure #[] + | _ => pure #[] + +/-! ## Main Entry Points -/ + +/-- Decompile a single named constant, purely. Returns Ix types. -/ +def decompileOne (env : DecompileEnv) (ixonEnv : Ixon.Env) + (_ixName : Ix.Name) (named : Ixon.Named) + : Except String (Array (Ix.Name × Ix.ConstantInfo)) := + match ixonEnv.consts.get? named.addr with + | none => .ok #[] + | some cnst => + let m : DecompileM (Array (Ix.Name × Ix.ConstantInfo)) := + match cnst.info with + | .defn d => do + let info ← decompileDefinition d cnst named.constMeta + pure #[(info.getCnst.name, info)] + | .axio ax => do + let info ← decompileAxiom ax cnst named.constMeta + pure #[(info.getCnst.name, info)] + | .quot q => do + let info ← decompileQuotient q cnst named.constMeta + pure #[(info.getCnst.name, info)] + | .recr rec => do + let info ← decompileRecursor rec cnst named.constMeta + pure #[(info.getCnst.name, info)] + | .dPrj proj => + match ixonEnv.consts.get? proj.block with + | some { info := .muts mutuals, sharing, refs, univs } => + decompileProjection cnst named.constMeta mutuals sharing refs univs + | _ => pure #[] + | .iPrj proj => + match ixonEnv.consts.get? proj.block with + | some { info := .muts mutuals, sharing, refs, univs } => + decompileProjection cnst named.constMeta mutuals sharing refs univs + | _ => pure #[] + | .rPrj proj => + match ixonEnv.consts.get? proj.block with + | some { info := .muts mutuals, sharing, refs, univs } => + decompileProjection cnst named.constMeta mutuals sharing refs univs + | _ => pure #[] + | .cPrj _ => pure #[] + | .muts _ => pure #[] + match DecompileM.run env default {} m with + | .ok (entries, _) => .ok entries + | .error err => .error (toString err) + +/-- Decompile a chunk of constants, purely. Returns results and errors. -/ +def decompileChunk (env : DecompileEnv) (ixonEnv : Ixon.Env) + (chunk : Array (Ix.Name × Ixon.Named)) + : Array (Ix.Name × Ix.ConstantInfo) × Array (Ix.Name × String) := Id.run do + let mut results : Array (Ix.Name × Ix.ConstantInfo) := #[] + let mut errors : Array (Ix.Name × String) := #[] + for (ixName, named) in chunk do + match decompileOne env ixonEnv ixName named with + | .ok entries => results := results ++ entries + | .error err => errors := errors.push (ixName, err) + (results, errors) + +/-- Decompile all constants in parallel using chunked pure Tasks. Returns Ix types. -/ +def decompileAllParallel (ixonEnv : Ixon.Env) (numWorkers : Nat := 32) + : Std.HashMap Ix.Name Ix.ConstantInfo × Array (Ix.Name × String) := Id.run do + let env : DecompileEnv := { ixonEnv } + -- Collect all named entries into an array + let mut allEntries : Array (Ix.Name × Ixon.Named) := #[] + for (ixName, named) in ixonEnv.named do + allEntries := allEntries.push (ixName, named) + let total := allEntries.size + let chunkSize := (total + numWorkers - 1) / numWorkers + -- Spawn one task per chunk + let mut tasks : Array (Task (Array (Ix.Name × Ix.ConstantInfo) × Array (Ix.Name × String))) := #[] + let mut offset := 0 + while offset < total do + let endIdx := min (offset + chunkSize) total + let chunk := allEntries[offset:endIdx] + let task := Task.spawn (prio := .dedicated) fun () => + decompileChunk env ixonEnv chunk.toArray + tasks := tasks.push task + offset := endIdx + -- Collect results + let mut result : Std.HashMap Ix.Name Ix.ConstantInfo := {} + let mut errors : Array (Ix.Name × String) := #[] + for task in tasks do + let (chunkResults, chunkErrors) := task.get + for (n, info) in chunkResults do + result := result.insert n info + errors := errors ++ chunkErrors + (result, errors) + +/-- Decompile all constants in parallel, with IO logging. -/ +def decompileAllParallelIO (ixonEnv : Ixon.Env) + : IO (Std.HashMap Ix.Name Ix.ConstantInfo × Array (Ix.Name × String)) := do + let total := ixonEnv.named.size + IO.println s!" [Decompile] {total} named constants, spawning tasks..." + let startTime ← IO.monoMsNow + let (result, errors) := decompileAllParallel ixonEnv + let elapsed := (← IO.monoMsNow) - startTime + IO.println s!" [Decompile] Done: {result.size} ok, {errors.size} errors in {elapsed}ms" + pure (result, errors) + +/-! ## Rust FFI Decompilation -/ + +@[extern "rs_decompile_env"] +opaque rsDecompileEnvFFI : @& Ixon.RawEnv → Except DecompileError (Array (Ix.Name × Ix.ConstantInfo)) + +/-- Decompile an Ixon.Env to Ix.ConstantInfo using Rust. -/ +def rsDecompileEnv (env : Ixon.Env) : Except DecompileError (Std.HashMap Ix.Name Ix.ConstantInfo) := do + let arr ← rsDecompileEnvFFI env.toRawEnv + return arr.foldl (init := {}) fun m (name, info) => m.insert name info + +end Ix.DecompileM diff --git a/Ix/Environment.lean b/Ix/Environment.lean new file mode 100644 index 00000000..7a6aa6b7 --- /dev/null +++ b/Ix/Environment.lean @@ -0,0 +1,619 @@ +/- + Canonical Lean types with embedded content-addressed hashes. + + Ix types mirror Lean's core types but include a Blake3 hash at each node, + enabling O(1) equality checks and content-addressed storage. +-/ + +import Lean +import Blake3 +import Std.Data.HashMap +import Batteries.Data.RBMap +import Ix.Address + +namespace Ix + +open Std (HashMap) + +/-! ## LEON (Lean Objective Notation) Tags (must match Rust env.rs) -/ +def TAG_NANON : UInt8 := 0x00 +def TAG_NSTR : UInt8 := 0x01 +def TAG_NNUM : UInt8 := 0x02 +def TAG_UZERO : UInt8 := 0x03 +def TAG_USUCC : UInt8 := 0x04 +def TAG_UMAX : UInt8 := 0x05 +def TAG_UIMAX : UInt8 := 0x06 +def TAG_UPARAM : UInt8 := 0x10 +def TAG_UMVAR : UInt8 := 0x70 +def TAG_EVAR : UInt8 := 0x20 +def TAG_ESORT : UInt8 := 0x80 +def TAG_EREF : UInt8 := 0x30 +def TAG_EPRJ : UInt8 := 0x50 +def TAG_ESTR : UInt8 := 0x81 +def TAG_ENAT : UInt8 := 0x82 +def TAG_EAPP : UInt8 := 0x83 +def TAG_ELAM : UInt8 := 0x84 +def TAG_EALL : UInt8 := 0x85 +def TAG_ELET : UInt8 := 0x86 +def TAG_EFVAR : UInt8 := 0x72 +def TAG_EMVAR : UInt8 := 0x73 +def TAG_EMDATA : UInt8 := 0x74 +def TAG_DEFN : UInt8 := 0xA0 +def TAG_RECR : UInt8 := 0xA1 +def TAG_AXIO : UInt8 := 0xA2 +def TAG_QUOT : UInt8 := 0xA3 +def TAG_INDC : UInt8 := 0xA6 +def TAG_CTOR : UInt8 := 0xC0 +def TAG_THEO : UInt8 := 0xC1 +def TAG_OPAQ : UInt8 := 0xC2 +def TAG_MINT : UInt8 := 0xF1 +def TAG_MSSTR : UInt8 := 0xF2 +def TAG_MSINFO : UInt8 := 0xF3 +def TAG_MSPRE : UInt8 := 0xF4 +def TAG_MSYN : UInt8 := 0xF5 +def TAG_MDVAL : UInt8 := 0xF6 + +/-! ## Name -/ + +/-- Content-addressed hierarchical name. Mirrors `Lean.Name` but carries a Blake3 hash at each node for O(1) equality. -/ +inductive Name where + | anonymous (hash : Address) + | str (parent : Name) (s : String) (hash : Address) + | num (parent : Name) (i : Nat) (hash : Address) + deriving Repr, Nonempty + +namespace Name + +/-- Extract the Blake3 hash stored at the root of a `Name`. -/ +def getHash : Name → Address + | anonymous h => h + | str _ _ h => h + | num _ _ h => h + +instance : BEq Name where + beq a b := a.getHash == b.getHash + +instance : Hashable Name where + hash n := hash n.getHash -- Uses Address's Hashable (first 8 bytes as LE u64) + +/-- The anonymous (root) name with its canonical hash. -/ +def mkAnon : Name := .anonymous <| Address.blake3 (ByteArray.mk #[TAG_NANON]) + +instance : Inhabited Name where + default := mkAnon + +/-- Construct a string name component, hashing the tag, parent hash, and string bytes. -/ +def mkStr (pre: Name) (s: String): Name := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_NSTR]) + h := h.update pre.getHash.hash + h := h.update s.toUTF8 + .str pre s ⟨(h.finalizeWithLength 32).val⟩ + +/-- Construct a numeric name component, hashing the tag, parent hash, and little-endian nat bytes. -/ +def mkNat (pre: Name) (i: Nat): Name := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_NNUM]) + h := h.update pre.getHash.hash + h := h.update ⟨i.toBytesLE⟩ + .num pre i ⟨(h.finalizeWithLength 32).val⟩ + +partial def toStringAux : Name → String + | .anonymous _ => "" + | .str (.anonymous _) s _ => s + | .str parent s _ => s!"{toStringAux parent}.{s}" + | .num (.anonymous _) n _ => s!"«{n}»" + | .num parent n _ => s!"{toStringAux parent}.«{n}»" + +instance : ToString Name where + toString := toStringAux + +end Name + +/-- Compare Ix.Name by hash for ordered collections. -/ +def nameCompare (a b : Name) : Ordering := + compare a.getHash b.getHash + +instance : Ord Name where + compare := nameCompare + +/-! ## Level -/ + +/-- Content-addressed universe level. Mirrors `Lean.Level` with a Blake3 hash at each node. -/ +inductive Level where + | zero (hash : Address) + | succ (x : Level) (hash : Address) + | max (x y : Level) (hash : Address) + | imax (x y : Level) (hash : Address) + | param (n : Name) (hash : Address) + | mvar (n : Name) (hash : Address) + deriving Repr, Nonempty + +namespace Level + +/-- Extract the Blake3 hash stored at the root of a `Level`. -/ +def getHash : Level → Address + | zero h => h + | succ _ h => h + | max _ _ h => h + | imax _ _ h => h + | param _ h => h + | mvar _ h => h + +def mkZero : Level := .zero <| Address.blake3 (ByteArray.mk #[TAG_UZERO]) + +def mkSucc (x: Level) : Level := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_USUCC]) + h := h.update x.getHash.hash + .succ x ⟨(h.finalizeWithLength 32).val⟩ + +def mkMax (x y : Level) : Level := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_UMAX]) + h := h.update x.getHash.hash + h := h.update y.getHash.hash + .max x y ⟨(h.finalizeWithLength 32).val⟩ + +def mkIMax (x y : Level) : Level := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_UIMAX]) + h := h.update x.getHash.hash + h := h.update y.getHash.hash + .imax x y ⟨(h.finalizeWithLength 32).val⟩ + +def mkParam (n: Name) : Level := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_UPARAM]) + h := h.update n.getHash.hash + .param n ⟨(h.finalizeWithLength 32).val⟩ + +def mkMvar (n: Name) : Level := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_UMVAR]) + h := h.update n.getHash.hash + .mvar n ⟨(h.finalizeWithLength 32).val⟩ + +instance : BEq Level where + beq a b := a.getHash == b.getHash + +instance : Hashable Level where + hash l := hash l.getHash -- Uses Address's Hashable (first 8 bytes as LE u64) + +instance : Inhabited Level where + default := mkZero + +end Level + +/-! ## Auxiliary types for MData -/ + +/-- Ix-local integer type used within `MData` values (mirrors `Int` for serialization). -/ +inductive Int where + | ofNat (n : Nat) + | negSucc (n : Nat) + deriving BEq, Repr, Inhabited + +structure Substring where + str : String + startPos : Nat + stopPos : Nat + deriving BEq, Repr, Inhabited + +inductive SourceInfo where + | original (leading : Substring) (leadingPos : Nat) + (trailing : Substring) (trailingPos : Nat) + | synthetic (start : Nat) (stop : Nat) (canonical : Bool) + | none + deriving BEq, Repr, Inhabited + +inductive SyntaxPreresolved where + | namespace (name : Name) + | decl (name : Name) (aliases : Array String) + deriving BEq, Repr, Inhabited + +inductive Syntax where + | missing + | node (info : SourceInfo) (kind : Name) (args : Array Syntax) + | atom (info : SourceInfo) (val : String) + | ident (info : SourceInfo) (rawVal : Substring) (val : Name) + (preresolved : Array SyntaxPreresolved) + deriving BEq, Repr, Inhabited, Nonempty + +/-- A metadata value carried in an `mdata` expression node. -/ +inductive DataValue where + | ofString (s : String) + | ofBool (b : Bool) + | ofName (n : Name) + | ofNat (n : Nat) + | ofInt (i : Int) + | ofSyntax (s : Syntax) + deriving BEq, Repr, Inhabited + +/-! ## Expr -/ + +/-- Content-addressed expression. Mirrors `Lean.Expr` with a Blake3 hash at each node, + enabling O(1) structural equality and content-addressed storage. -/ +inductive Expr where + | bvar (idx : Nat) (hash : Address) + | fvar (name : Name) (hash : Address) + | mvar (name : Name) (hash : Address) + | sort (level : Level) (hash : Address) + | const (name : Name) (levels : Array Level) (hash : Address) + | app (fn arg : Expr) (hash : Address) + | lam (name : Name) (ty body : Expr) (bi : Lean.BinderInfo) (hash : Address) + | forallE (name : Name) (ty body : Expr) (bi : Lean.BinderInfo) (hash : Address) + | letE (name : Name) (ty val body : Expr) (nonDep : Bool) (hash : Address) + | lit (l : Lean.Literal) (hash : Address) + | mdata (data : Array (Name × DataValue)) (expr : Expr) (hash : Address) + | proj (typeName : Name) (idx : Nat) (struct : Expr) (hash : Address) + deriving Repr, Nonempty + +namespace Expr + +def binderInfoTag : Lean.BinderInfo → UInt8 + | .default => 0 + | .implicit => 1 + | .strictImplicit => 2 + | .instImplicit => 3 + +/-- Extract the Blake3 hash stored at the root of an `Expr`. -/ +def getHash : Expr → Address + | bvar _ h => h + | fvar _ h => h + | mvar _ h => h + | sort _ h => h + | const _ _ h => h + | app _ _ h => h + | lam _ _ _ _ h => h + | forallE _ _ _ _ h => h + | letE _ _ _ _ _ h => h + | lit _ h => h + | mdata _ _ h => h + | proj _ _ _ h => h + +instance : BEq Expr where + beq a b := a.getHash == b.getHash + +instance : Hashable Expr where + hash e := hash e.getHash -- Uses Address's Hashable (first 8 bytes as LE u64) + +def mkBVar (x: Nat) : Expr := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_EVAR]) + h := h.update ⟨x.toBytesLE⟩ + .bvar x ⟨(h.finalizeWithLength 32).val⟩ + +def mkFVar (x: Name) : Expr := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_EFVAR]) + h := h.update x.getHash.hash + .fvar x ⟨(h.finalizeWithLength 32).val⟩ + +def mkMVar (x: Name) : Expr := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_EMVAR]) + h := h.update x.getHash.hash + .mvar x ⟨(h.finalizeWithLength 32).val⟩ + +def mkSort (x: Level) : Expr := Id.run <| do + let h := Blake3.Hasher.init () + let h := h.update (ByteArray.mk #[TAG_ESORT]) + let h := h.update x.getHash.hash + .sort x ⟨(h.finalizeWithLength 32).val⟩ + +def mkConst (x: Name) (us: Array Level): Expr := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_EREF]) + h := h.update x.getHash.hash + for u in us do + h := h.update u.getHash.hash + .const x us ⟨(h.finalizeWithLength 32).val⟩ + +def mkApp (f a : Expr) : Expr := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_EAPP]) + h := h.update f.getHash.hash + h := h.update a.getHash.hash + .app f a ⟨(h.finalizeWithLength 32).val⟩ + +def mkLam (n : Name) (t b : Expr) (bi : Lean.BinderInfo) : Expr := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_ELAM]) + h := h.update n.getHash.hash + h := h.update t.getHash.hash + h := h.update b.getHash.hash + h := h.update (ByteArray.mk #[binderInfoTag bi]) + .lam n t b bi ⟨(h.finalizeWithLength 32).val⟩ + +def mkForallE (n : Name) (t b : Expr) (bi : Lean.BinderInfo) : Expr := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_EALL]) + h := h.update n.getHash.hash + h := h.update t.getHash.hash + h := h.update b.getHash.hash + h := h.update (ByteArray.mk #[binderInfoTag bi]) + .forallE n t b bi ⟨(h.finalizeWithLength 32).val⟩ + +def mkLetE (n : Name) (t v b : Expr) (nd : Bool) : Expr := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_ELET]) + h := h.update n.getHash.hash + h := h.update t.getHash.hash + h := h.update v.getHash.hash + h := h.update b.getHash.hash + h := h.update (ByteArray.mk #[if nd then 1 else 0]) + .letE n t v b nd ⟨(h.finalizeWithLength 32).val⟩ + +def mkLit (l : Lean.Literal) : Expr := Id.run <| do + let mut h := Blake3.Hasher.init () + match l with + | .natVal n => + h := h.update (ByteArray.mk #[TAG_ENAT]) + h := h.update ⟨n.toBytesLE⟩ + | .strVal s => + h := h.update (ByteArray.mk #[TAG_ESTR]) + h := h.update s.toUTF8 + .lit l ⟨(h.finalizeWithLength 32).val⟩ + +def mkProj (n : Name) (i : Nat) (e : Expr) : Expr := Id.run <| do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_EPRJ]) + h := h.update n.getHash.hash + h := h.update ⟨i.toBytesLE⟩ + h := h.update e.getHash.hash + .proj n i e ⟨(h.finalizeWithLength 32).val⟩ + +def hashInt (h : Blake3.Hasher) (i : Int) : Blake3.Hasher := Id.run do + let mut h := h.update (ByteArray.mk #[TAG_MINT]) + match i with + | .ofNat n => + h := h.update (ByteArray.mk #[0]) + h := h.update ⟨n.toBytesLE⟩ + | .negSucc n => + h := h.update (ByteArray.mk #[1]) + h := h.update ⟨n.toBytesLE⟩ + h + +def hashSubstring (h : Blake3.Hasher) (ss : Substring) : Blake3.Hasher := + Id.run do + let mut h := h.update (ByteArray.mk #[TAG_MSSTR]) + h := h.update ss.str.toUTF8 + h := h.update ⟨ss.startPos.toBytesLE⟩ + h := h.update ⟨ss.stopPos.toBytesLE⟩ + h + +def hashSourceInfo (h : Blake3.Hasher) (si : SourceInfo) : Blake3.Hasher := + Id.run do + let mut h := h.update (ByteArray.mk #[TAG_MSINFO]) + match si with + | .original leading leadingPos trailing trailingPos => + h := h.update (ByteArray.mk #[0]) + h := hashSubstring h leading + h := h.update ⟨leadingPos.toBytesLE⟩ + h := hashSubstring h trailing + h := h.update ⟨trailingPos.toBytesLE⟩ + | .synthetic start stop canonical => + h := h.update (ByteArray.mk #[1]) + h := h.update ⟨start.toBytesLE⟩ + h := h.update ⟨stop.toBytesLE⟩ + h := h.update (ByteArray.mk #[if canonical then 1 else 0]) + | .none => + h := h.update (ByteArray.mk #[2]) + h + +def hashSyntaxPreresolved (h : Blake3.Hasher) (sp : SyntaxPreresolved) + : Blake3.Hasher := Id.run do + let mut h := h.update (ByteArray.mk #[TAG_MSPRE]) + match sp with + | .namespace name => + h := h.update (ByteArray.mk #[0]) + h := h.update name.getHash.hash + | .decl name aliases => + h := h.update (ByteArray.mk #[1]) + h := h.update name.getHash.hash + for a in aliases do + h := h.update a.toUTF8 + h := h.update (ByteArray.mk #[0]) + h + +private partial def hashSyntax (h : Blake3.Hasher) (syn : Syntax) + : Blake3.Hasher := Id.run do + let mut h := h.update (ByteArray.mk #[TAG_MSYN]) + match syn with + | .missing => + h := h.update (ByteArray.mk #[0]) + | .node info kind args => + h := h.update (ByteArray.mk #[1]) + h := hashSourceInfo h info + h := h.update kind.getHash.hash + h := h.update ⟨args.size.toBytesLE⟩ + for arg in args do + h := hashSyntax h arg + | .atom info val => + h := h.update (ByteArray.mk #[2]) + h := hashSourceInfo h info + h := h.update val.toUTF8 + | .ident info rawVal val preresolved => + h := h.update (ByteArray.mk #[3]) + h := hashSourceInfo h info + h := hashSubstring h rawVal + h := h.update val.getHash.hash + h := h.update ⟨preresolved.size.toBytesLE⟩ + for pr in preresolved do + h := hashSyntaxPreresolved h pr + h + +def hashDataValue (h : Blake3.Hasher) (dv : DataValue) + : Blake3.Hasher := Id.run do + let mut h := h.update (ByteArray.mk #[TAG_MDVAL]) + match dv with + | .ofString s => + h := h.update (ByteArray.mk #[0]) + h := h.update s.toUTF8 + | .ofBool b => + h := h.update (ByteArray.mk #[1]) + h := h.update (ByteArray.mk #[if b then 1 else 0]) + | .ofName name => + h := h.update (ByteArray.mk #[2]) + h := h.update name.getHash.hash + | .ofNat n => + h := h.update (ByteArray.mk #[3]) + h := h.update ⟨n.toBytesLE⟩ + | .ofInt i => + h := h.update (ByteArray.mk #[4]) + h := hashInt h i + | .ofSyntax syn => + h := h.update (ByteArray.mk #[5]) + h := hashSyntax h syn + h + +def mkMData (data : Array (Name × DataValue)) (e : Expr) : Expr := Id.run do + let mut h := Blake3.Hasher.init () + h := h.update (ByteArray.mk #[TAG_EMDATA]) + h := h.update ⟨data.size.toBytesLE⟩ + for (name, dv) in data do + h := h.update name.getHash.hash + h := hashDataValue h dv + h := h.update e.getHash.hash + .mdata data e ⟨(h.finalizeWithLength 32).val⟩ + +instance : Inhabited Expr where + default := mkBVar 0 + +end Expr + +/-! ## Constant Types -/ + +/-- Common fields shared by all constant declarations: name, universe parameters, and type. -/ +structure ConstantVal where + name : Name + levelParams : Array Name + type : Expr + deriving Repr, BEq + +structure AxiomVal where + cnst : ConstantVal + isUnsafe : Bool + deriving Repr, BEq + +structure DefinitionVal where + cnst : ConstantVal + value : Expr + hints : Lean.ReducibilityHints + safety : Lean.DefinitionSafety + all : Array Name + deriving Repr, BEq + +structure TheoremVal where + cnst : ConstantVal + value : Expr + all : Array Name + deriving Repr, BEq + +structure OpaqueVal where + cnst : ConstantVal + value : Expr + isUnsafe : Bool + all : Array Name + deriving Repr, BEq + +structure QuotVal where + cnst : ConstantVal + kind : Lean.QuotKind + deriving Repr, BEq + +structure InductiveVal where + cnst : ConstantVal + numParams : Nat + numIndices : Nat + all : Array Name + ctors : Array Name + numNested : Nat + isRec : Bool + isUnsafe : Bool + isReflexive : Bool + deriving Repr, BEq + +structure ConstructorVal where + cnst : ConstantVal + induct : Name + cidx : Nat + numParams : Nat + numFields : Nat + isUnsafe : Bool + deriving Repr, BEq + +structure RecursorRule where + ctor : Name + nfields : Nat + rhs : Expr + deriving Repr, BEq + +structure RecursorVal where + cnst : ConstantVal + all : Array Name + numParams : Nat + numIndices : Nat + numMotives : Nat + numMinors : Nat + rules : Array RecursorRule + k : Bool + isUnsafe : Bool + deriving Repr, BEq + +/-- Sum type of all Lean constant declarations (axioms, definitions, theorems, inductives, etc.). -/ +inductive ConstantInfo where + | axiomInfo (v : AxiomVal) + | defnInfo (v : DefinitionVal) + | thmInfo (v : TheoremVal) + | opaqueInfo (v : OpaqueVal) + | quotInfo (v : QuotVal) + | inductInfo (v : InductiveVal) + | ctorInfo (v : ConstructorVal) + | recInfo (v : RecursorVal) + deriving Repr, BEq + +/-- Extract the `ConstantVal` common fields from any `ConstantInfo` variant. -/ +def ConstantInfo.getCnst : ConstantInfo → ConstantVal + | .axiomInfo v => v.cnst + | .defnInfo v => v.cnst + | .thmInfo v => v.cnst + | .opaqueInfo v => v.cnst + | .quotInfo v => v.cnst + | .inductInfo v => v.cnst + | .ctorInfo v => v.cnst + | .recInfo v => v.cnst + +/-! ## Environment -/ + +/-- A content-addressed Lean environment: a map from `Ix.Name` to `ConstantInfo`. -/ +structure Environment where + consts : HashMap Name ConstantInfo + +/-- Raw environment data as arrays (returned from Rust FFI). + Use `toEnvironment` to convert to Environment with HashMaps. -/ +structure RawEnvironment where + consts : Array (Name × ConstantInfo) + deriving Repr, Inhabited + +/-- Convert raw arrays to Environment with HashMaps. + This is done on the Lean side for correct hash function usage. -/ +def RawEnvironment.toEnvironment (raw : RawEnvironment) : Environment := + { consts := raw.consts.foldl (init := {}) fun m (k, v) => m.insert k v } + +/-- Convert Environment to raw arrays for FFI usage. -/ +def Environment.toRaw (env : Environment) : RawEnvironment := + { consts := env.consts.toArray } + +/-! ## Context Types for Compilation -/ + +/-- Mutual context mapping Name to index within block. -/ +abbrev MutCtx := Batteries.RBMap Name Nat nameCompare + +instance : Ord MutCtx where + compare a b := compare a.toList b.toList + +/-- Set of Names (for tracking constants in a block). -/ +abbrev NameSet := Batteries.RBSet Name nameCompare + +end Ix diff --git a/Ix/GraphM.lean b/Ix/GraphM.lean index 81f4d85d..74e9fcf1 100644 --- a/Ix/GraphM.lean +++ b/Ix/GraphM.lean @@ -1,83 +1,115 @@ import Lean import Ix.Common +import Ix.Environment namespace Ix +/-! + # GraphM - Build dependency graph from Ix.Environment + + This module builds a reference graph from an already-canonicalized Ix.Environment. + The canonicalization should be done first (ideally via fast Rust FFI). +-/ + structure GraphState where - exprCache: Map Lean.Expr (Set Lean.Name) + exprCache: Map Ix.Expr (Set Ix.Name) def GraphState.init : GraphState := ⟨{}⟩ -abbrev GraphM := ReaderT Lean.Environment <| StateT GraphState Id +abbrev GraphM := ReaderT Ix.Environment <| StateT GraphState Id -def graphExpr (expr: Lean.Expr) : GraphM (Set Lean.Name) := do - match (<- get).exprCache.find? expr with +/-- Extract constant references from an Ix.Expr. + NOTE: Aligned with Rust's get_expr_references for cross-impl testing. -/ +def graphExpr (expr: Ix.Expr) : GraphM (Set Ix.Name) := do + match (<- get).exprCache.get? expr with | some x => pure x - | none => + | none => let refs <- go expr modifyGet fun stt => (refs, { stt with exprCache := stt.exprCache.insert expr refs }) where - go : Lean.Expr -> GraphM (Set Lean.Name) - | .mdata _ x => graphExpr x - | .const name _ => pure {name} - | .app f a => .union <$> graphExpr f <*> graphExpr a - | .lam _ t b _ => .union <$> graphExpr t <*> graphExpr b - | .forallE _ t b _ => .union <$> graphExpr t <*> graphExpr b - | .letE _ t v b _ => + go : Ix.Expr -> GraphM (Set Ix.Name) + | .mdata _ x _ => graphExpr x + | .const name _ _ => pure {name} + | .app f a _ => .union <$> graphExpr f <*> graphExpr a + | .lam _ t b _ _ => .union <$> graphExpr t <*> graphExpr b + | .forallE _ t b _ _ => .union <$> graphExpr t <*> graphExpr b + | .letE _ t v b _ _ => .union <$> graphExpr t <*> (.union <$> graphExpr v <*> graphExpr b) - | .proj typeName _ s => (.insert · typeName) <$> graphExpr s + | .proj typeName _ s _ => (.insert · typeName) <$> graphExpr s | _ => pure {} -def graphConst: Lean.ConstantInfo -> GraphM (Set Lean.Name) -| .axiomInfo val => graphExpr val.type -| .defnInfo val => .union <$> graphExpr val.type <*> graphExpr val.value -| .thmInfo val => .union <$> graphExpr val.type <*> graphExpr val.value -| .opaqueInfo val => .union <$> graphExpr val.type <*> graphExpr val.value -| .quotInfo val => graphExpr val.type +/-- Extract constant references from an Ix.ConstantInfo. + NOTE: Aligned with Rust's get_constant_info_references for cross-impl testing. -/ +def graphConst: Ix.ConstantInfo -> GraphM (Set Ix.Name) +| .axiomInfo val => graphExpr val.cnst.type +| .defnInfo val => .union <$> graphExpr val.cnst.type <*> graphExpr val.value +| .thmInfo val => .union <$> graphExpr val.cnst.type <*> graphExpr val.value +| .opaqueInfo val => .union <$> graphExpr val.cnst.type <*> graphExpr val.value +| .quotInfo val => graphExpr val.cnst.type | .inductInfo val => do - let env <- read - let mut ctorRefs := {} - for ctor in val.ctors do - let rs <- match env.find? ctor with - | .some (.ctorInfo ctorVal) => graphExpr ctorVal.type - | _ => continue - ctorRefs := ctorRefs.union rs - let type <- graphExpr val.type - return .union (.union (.ofList val.ctors) ctorRefs) type -| .ctorInfo val => graphExpr val.type + -- Rust: type refs + constructor names (NOT constructor type refs) + let ctorNames : Set Ix.Name := val.ctors.foldl (init := {}) fun s n => s.insert n + let type <- graphExpr val.cnst.type + return .union type ctorNames +| .ctorInfo val => do + -- Rust: type refs + induct name + let typeRefs <- graphExpr val.cnst.type + return typeRefs.insert val.induct | .recInfo val => do - let t <- graphExpr val.type - let rs <- val.rules.foldrM (fun r s => .union s <$> graphExpr r.rhs) {} - return .union t rs + -- Rust: type refs + (ctor names + rhs refs for each rule) + let t <- graphExpr val.cnst.type + let mut rs := t + for rule in val.rules do + rs := rs.insert rule.ctor + let rhsRefs <- graphExpr rule.rhs + rs := rs.union rhsRefs + return rs -def GraphM.run (env: Lean.Environment) (stt: GraphState) (g: GraphM α) +def GraphM.run (env: Ix.Environment) (stt: GraphState) (g: GraphM α) : α × GraphState := StateT.run (ReaderT.run (Id.run g env)) stt -def GraphM.env (env: Lean.Environment) : Map Lean.Name (Set Lean.Name) := Id.run do - let mut tasks : Map Lean.Name (Task (Set Lean.Name)) := {} - for (name, const) in env.constants do - let task <- Task.spawn fun () => (GraphM.run env .init (graphConst const)).1 - tasks := tasks.insert name task - return tasks.map fun _ t => t.get - -def GraphM.envSerial (env: Lean.Environment) : Map Lean.Name (Set Lean.Name) := Id.run do - let mut refs: Map Lean.Name (Set Lean.Name) := {} - for (name, const) in env.constants do - let (rs, _) := GraphM.run env .init (graphConst const) - refs := refs.insert name rs - return refs - -def GraphM.envSerialShareCache (env: Lean.Environment) : Map Lean.Name (Set Lean.Name) := Id.run do +/-- Build dependency graph from Ix.Environment. + Returns a map from Ix.Name to the set of Ix.Names it references. + Pass `dbg := true` and `total` (constant count) to enable progress tracing. -/ +def GraphM.env (env: Ix.Environment) (dbg : Bool := false) (total : Nat := 0) + : Map Ix.Name (Set Ix.Name) := Id.run do let mut stt : GraphState := .init - let mut refs: Map Lean.Name (Set Lean.Name) := {} - for (name, const) in env.constants do + let mut refs: Map Ix.Name (Set Ix.Name) := {} + let mut i : Nat := 0 + let mut lastPct : Nat := 0 + for (name, const) in env.consts do let (rs, stt') := GraphM.run env stt (graphConst const) refs := refs.insert name rs stt := stt' + i := i + 1 + if dbg && total > 0 then + let pct := (i * 100) / total + if pct >= lastPct + 10 then + dbg_trace s!" [Graph] {pct}% ({i}/{total})" + lastPct := pct + return refs + +def GraphM.envParallel (env: Ix.Environment) : Map Ix.Name (Set Ix.Name) := Id.run do + let mut tasks : Array (Ix.Name × Task (Set Ix.Name)) := #[] + for (name, const) in env.consts do + let task := Task.spawn fun () => + let (rs, _) := GraphM.run env .init (graphConst const) + rs + tasks := tasks.push (name, task) + let mut refs : Map Ix.Name (Set Ix.Name) := {} + for (name, task) in tasks do + refs := refs.insert name task.get + return refs + +def GraphM.envSerial (env: Ix.Environment) : Map Ix.Name (Set Ix.Name) := Id.run do + let mut refs: Map Ix.Name (Set Ix.Name) := {} + for (name, const) in env.consts do + let (rs, _) := GraphM.run env .init (graphConst const) + refs := refs.insert name rs return refs end Ix diff --git a/Ix/IxVM/Ixon.lean b/Ix/IxVM/Ixon.lean index 35b7b055..c920e22d 100644 --- a/Ix/IxVM/Ixon.lean +++ b/Ix/IxVM/Ixon.lean @@ -64,14 +64,11 @@ def ixon := ⟦ -- DPrj(DefinitionProj), -- 0xA7, definition projection -- Muts(Vec), -- 0xBX, mutual constants - -- Prof(Proof), -- 0xE0, zero-knowledge proof - Eval(Address, Address, Address, Address), -- 0xE1, evaluation claim - Chck(Address, Address, Address), -- 0xE2, typechecking claim - Comm(Address, Address), -- 0xE3, cryptographic commitment - -- Envn(Env), -- 0xE4, multi-claim environment - Prim(BuiltIn) -- 0xE5, compiler built-ins - -- Meta(Metadata) - -- 0xFX, metadata + -- 0xE0: Env (environment), 0xE1: CheckProof, 0xE2: EvalProof + Chck(Address, Address, Address), -- 0xE3, typechecking claim + Eval(Address, Address, Address, Address), -- 0xE4, evaluation claim + Comm(Address, Address), -- 0xE5, cryptographic commitment + Prim(BuiltIn) -- 0xE6, compiler built-ins } ⟧ diff --git a/Ix/IxVM/IxonDeserialize.lean b/Ix/IxVM/IxonDeserialize.lean index f126bae0..583c3f33 100644 --- a/Ix/IxVM/IxonDeserialize.lean +++ b/Ix/IxVM/IxonDeserialize.lean @@ -80,7 +80,13 @@ def ixonDeserialize := ⟦ let (addr2, tail) = deserialize_addr(tail, [[0; 4]; 8], 0); let (addr3, _tail) = deserialize_addr(tail, [[0; 4]; 8], 0); Ixon.ELet(0, Address.Bytes(addr1), Address.Bytes(addr2), Address.Bytes(addr3)), - ByteStream.Cons(0xE1, tail_ptr) => + ByteStream.Cons(0xE3, tail_ptr) => + let tail = load(tail_ptr); + let (addr1, tail) = deserialize_addr(tail, [[0; 4]; 8], 0); + let (addr2, tail) = deserialize_addr(tail, [[0; 4]; 8], 0); + let (addr3, _tail) = deserialize_addr(tail, [[0; 4]; 8], 0); + Ixon.Chck(Address.Bytes(addr1), Address.Bytes(addr2), Address.Bytes(addr3)), + ByteStream.Cons(0xE4, tail_ptr) => let tail = load(tail_ptr); let (addr1, tail) = deserialize_addr(tail, [[0; 4]; 8], 0); let (addr2, tail) = deserialize_addr(tail, [[0; 4]; 8], 0); @@ -90,13 +96,7 @@ def ixonDeserialize := ⟦ Address.Bytes(addr1), Address.Bytes(addr2), Address.Bytes(addr3), Address.Bytes(addr4) ), - ByteStream.Cons(0xE2, tail_ptr) => - let tail = load(tail_ptr); - let (addr1, tail) = deserialize_addr(tail, [[0; 4]; 8], 0); - let (addr2, tail) = deserialize_addr(tail, [[0; 4]; 8], 0); - let (addr3, _tail) = deserialize_addr(tail, [[0; 4]; 8], 0); - Ixon.Chck(Address.Bytes(addr1), Address.Bytes(addr2), Address.Bytes(addr3)), - ByteStream.Cons(0xE3, tail_ptr) => + ByteStream.Cons(0xE5, tail_ptr) => let tail = load(tail_ptr); let (addr1, tail) = deserialize_addr(tail, [[0; 4]; 8], 0); let (addr2, _tail) = deserialize_addr(tail, [[0; 4]; 8], 0); diff --git a/Ix/IxVM/IxonSerialize.lean b/Ix/IxVM/IxonSerialize.lean index 0889db6d..a3a1f1f0 100644 --- a/Ix/IxVM/IxonSerialize.lean +++ b/Ix/IxVM/IxonSerialize.lean @@ -87,28 +87,28 @@ def ixonSerialize := ⟦ let stream = fold(8..0, stream, |stream, @i| fold(4..0, stream, |stream, @j| ByteStream.Cons(t[@i][@j], store(stream)))); ByteStream.Cons(tag, store(stream)), - Ixon.Eval(Address.Bytes(a), Address.Bytes(b), Address.Bytes(c), Address.Bytes(d)) => - let tag = 0xE1; + Ixon.Chck(Address.Bytes(a), Address.Bytes(b), Address.Bytes(c)) => + let tag = 0xE3; let stream = fold(8..0, stream, |stream, @i| fold(4..0, stream, |stream, @j| ByteStream.Cons(a[@i][@j], store(stream)))); let stream = fold(8..0, stream, |stream, @i| fold(4..0, stream, |stream, @j| ByteStream.Cons(b[@i][@j], store(stream)))); let stream = fold(8..0, stream, |stream, @i| fold(4..0, stream, |stream, @j| ByteStream.Cons(c[@i][@j], store(stream)))); - let stream = fold(8..0, stream, |stream, @i| - fold(4..0, stream, |stream, @j| ByteStream.Cons(d[@i][@j], store(stream)))); ByteStream.Cons(tag, store(stream)), - Ixon.Chck(Address.Bytes(a), Address.Bytes(b), Address.Bytes(c)) => - let tag = 0xE2; + Ixon.Eval(Address.Bytes(a), Address.Bytes(b), Address.Bytes(c), Address.Bytes(d)) => + let tag = 0xE4; let stream = fold(8..0, stream, |stream, @i| fold(4..0, stream, |stream, @j| ByteStream.Cons(a[@i][@j], store(stream)))); let stream = fold(8..0, stream, |stream, @i| fold(4..0, stream, |stream, @j| ByteStream.Cons(b[@i][@j], store(stream)))); let stream = fold(8..0, stream, |stream, @i| fold(4..0, stream, |stream, @j| ByteStream.Cons(c[@i][@j], store(stream)))); + let stream = fold(8..0, stream, |stream, @i| + fold(4..0, stream, |stream, @j| ByteStream.Cons(d[@i][@j], store(stream)))); ByteStream.Cons(tag, store(stream)), Ixon.Comm(Address.Bytes(a), Address.Bytes(b)) => - let tag = 0xE3; + let tag = 0xE5; let stream = fold(8..0, stream, |stream, @i| fold(4..0, stream, |stream, @j| ByteStream.Cons(a[@i][@j], store(stream)))); let stream = fold(8..0, stream, |stream, @i| diff --git a/Ix/Ixon.lean b/Ix/Ixon.lean index f634913f..5432d12c 100644 --- a/Ix/Ixon.lean +++ b/Ix/Ixon.lean @@ -1,39 +1,73 @@ +/- + Ixon: Alpha-invariant serialization format for Lean constants. + + This module defines: + - Serialize typeclass and primitive serialization + - Tag0/Tag2/Tag4 encoding + - Expr, Univ, and Constant types matching Rust exactly + - All numeric fields use UInt64 (matching Rust's u64) +-/ import Ix.Address -import Lean.Declaration -import Lean.Data.KVMap +import Ix.Common +import Ix.Environment namespace Ixon --- putter monad +/-! ## Serialization Monad and Typeclass -/ + abbrev PutM := StateM ByteArray structure GetState where idx : Nat := 0 bytes : ByteArray := .empty --- getter monad abbrev GetM := EStateM String GetState --- serialization typeclass class Serialize (α : Type) where put : α → PutM Unit get : GetM α -def runPut (p: PutM Unit) : ByteArray := (p.run ByteArray.empty).2 +def runPut (p : PutM Unit) : ByteArray := (p.run ByteArray.empty).2 -def runGet (getm: GetM A) (bytes: ByteArray) : Except String A := +def runGet (getm : GetM A) (bytes : ByteArray) : Except String A := match getm.run { idx := 0, bytes } with | .ok a _ => .ok a | .error e _ => .error e -def ser [Serialize α] (a: α): ByteArray := runPut (Serialize.put a) -def de [Serialize α] (bytes: ByteArray): Except String α := +def ser [Serialize α] (a : α) : ByteArray := runPut (Serialize.put a) +def de [Serialize α] (bytes : ByteArray) : Except String α := runGet Serialize.get bytes -def putUInt8 (x: UInt8) : PutM Unit := +/-! ## Serialization Error Type -/ + +/-- Serialization/deserialization error. Variant order matches Rust SerializeError (tags 0–6). -/ +inductive SerializeError where + | unexpectedEof (expected : String) + | invalidTag (tag : UInt8) (context : String) + | invalidFlag (flag : UInt8) (context : String) + | invalidVariant (variant : UInt64) (context : String) + | invalidBool (value : UInt8) + | addressError + | invalidShareIndex (idx : UInt64) (max : Nat) + deriving Repr, BEq + +def SerializeError.toString : SerializeError → String + | .unexpectedEof expected => s!"unexpected EOF, expected {expected}" + | .invalidTag tag context => s!"invalid tag 0x{String.ofList <| tag.toNat.toDigits 16} in {context}" + | .invalidFlag flag context => s!"invalid flag {flag} in {context}" + | .invalidVariant variant context => s!"invalid variant {variant} in {context}" + | .invalidBool value => s!"invalid bool value {value}" + | .addressError => "address parsing error" + | .invalidShareIndex idx max => s!"invalid Share index {idx}, max is {max}" + +instance : ToString SerializeError := ⟨SerializeError.toString⟩ + +/-! ## Primitive Serialization -/ + +def putU8 (x : UInt8) : PutM Unit := StateT.modifyGet (fun s => ((), s.push x)) -def getUInt8 : GetM UInt8 := do +def getU8 : GetM UInt8 := do let st ← get if st.idx < st.bytes.size then let b := st.bytes[st.idx]! @@ -43,68 +77,28 @@ def getUInt8 : GetM UInt8 := do throw "EOF" instance : Serialize UInt8 where - put := putUInt8 - get := getUInt8 - -def putUInt16LE (x: UInt16) : PutM Unit := do - List.forM (List.range 2) fun i => - let b := UInt16.toUInt8 (x >>> (i.toUInt16 * 8)) - putUInt8 b - pure () - -def getUInt16LE : GetM UInt16 := do - let mut x : UInt16 := 0 - for i in List.range 2 do - let b ← getUInt8 - x := x + (UInt8.toUInt16 b) <<< ((UInt16.ofNat i) * 8) - pure x - -instance : Serialize UInt16 where - put := putUInt16LE - get := getUInt16LE - -def putUInt32LE (x: UInt32) : PutM Unit := do - List.forM (List.range 4) fun i => - let b := UInt32.toUInt8 (x >>> (i.toUInt32 * 8)) - putUInt8 b - pure () - -def getUInt32LE : GetM UInt32 := do - let mut x : UInt32 := 0 - for i in List.range 4 do - let b ← getUInt8 - x := x + (UInt8.toUInt32 b) <<< ((UInt32.ofNat i) * 8) - pure x - -instance : Serialize UInt32 where - put := putUInt32LE - get := getUInt32LE - -def putUInt64LE (x: UInt64) : PutM Unit := do - List.forM (List.range 8) fun i => - let b := UInt64.toUInt8 (x >>> (i.toUInt64 * 8)) - putUInt8 b - pure () - -def getUInt64LE : GetM UInt64 := do + put := putU8 + get := getU8 + +def putU64LE (x : UInt64) : PutM Unit := do + for i in [0:8] do + putU8 ((x >>> (i.toUInt64 * 8)).toUInt8) + +def getU64LE : GetM UInt64 := do let mut x : UInt64 := 0 - for i in List.range 8 do - let b ← getUInt8 - x := x + (UInt8.toUInt64 b) <<< ((UInt64.ofNat i) * 8) - pure x + for i in [0:8] do + let b ← getU8 + x := x ||| (b.toUInt64 <<< (i.toUInt64 * 8)) + return x instance : Serialize UInt64 where - put := putUInt64LE - get := getUInt64LE + put := putU64LE + get := getU64LE -def putBytes (x: ByteArray) : PutM Unit := +def putBytes (x : ByteArray) : PutM Unit := StateT.modifyGet (fun s => ((), s.append x)) -def getBytesToEnd : GetM ByteArray := do - let st ← get - return st.bytes - -def getBytes (len: Nat) : GetM ByteArray := do +def getBytes (len : Nat) : GetM ByteArray := do let st ← get if st.idx + len <= st.bytes.size then let chunk := st.bytes.extract st.idx (st.idx + len) @@ -112,840 +106,1727 @@ def getBytes (len: Nat) : GetM ByteArray := do return chunk else throw s!"EOF: need {len} bytes at index {st.idx}, but size is {st.bytes.size}" --- F := flag, L := large-bit, X := small-field, A := large_field --- 0xFFFF_LXXX {AAAA_AAAA, ...} --- "Tag" means the whole thing --- "Head" means the first byte of the tag --- "Flag" means the first nibble of the head +instance : Serialize Bool where + put | .false => putU8 0 | .true => putU8 1 + get := do match ← getU8 with + | 0 => return .false + | 1 => return .true + | e => throw s!"expected Bool (0 or 1), got {e}" + +instance : Serialize Address where + put x := putBytes x.hash + get := Address.mk <$> getBytes 32 + +/-! ## Tag Encoding -/ + +/-- Count bytes needed to represent a u64 in minimal little-endian form. -/ +def u64ByteCount (x : UInt64) : UInt8 := + if x == 0 then 0 + else if x < 0x100 then 1 + else if x < 0x10000 then 2 + else if x < 0x1000000 then 3 + else if x < 0x100000000 then 4 + else if x < 0x10000000000 then 5 + else if x < 0x1000000000000 then 6 + else if x < 0x100000000000000 then 7 + else 8 + +/-- Write a u64 in minimal little-endian bytes. -/ +def putU64TrimmedLE (x : UInt64) : PutM Unit := do + let n := u64ByteCount x + for i in [0:n.toNat] do + putU8 ((x >>> (i.toUInt64 * 8)).toUInt8) + +/-- Read a u64 from minimal little-endian bytes. -/ +def getU64TrimmedLE (len : Nat) : GetM UInt64 := do + let mut x : UInt64 := 0 + for i in [0:len] do + let b ← getU8 + x := x ||| (b.toUInt64 <<< (i.toUInt64 * 8)) + return x + +/-- Tag0: Variable-length encoding for small integers. + Header byte: [large:1][size:7] + - If large=0: size is in low 7 bits (0-127) + - If large=1: (size+1) bytes follow containing actual size -/ +structure Tag0 where + size : UInt64 + deriving BEq, Repr + +def putTag0 (t : Tag0) : PutM Unit := do + if t.size < 128 then + putU8 t.size.toUInt8 + else + let byteCount := u64ByteCount t.size + putU8 (0x80 ||| (byteCount - 1)) + putU64TrimmedLE t.size + +def getTag0 : GetM Tag0 := do + let b ← getU8 + let large := b &&& 0x80 != 0 + let small := b &&& 0x7F + let size ← if large then + getU64TrimmedLE (small.toNat + 1) + else + pure small.toUInt64 + return ⟨size⟩ + +/-- Tag2: 2-bit flag + size. + Header byte: [flag:2][large:1][size:5] + - If large=0: size is in low 5 bits (0-31) + - If large=1: (size+1) bytes follow containing actual size -/ +structure Tag2 where + flag : UInt8 + size : UInt64 + deriving BEq, Repr + +def putTag2 (t : Tag2) : PutM Unit := do + if t.size < 32 then + putU8 ((t.flag <<< 6) ||| t.size.toUInt8) + else + let byteCount := u64ByteCount t.size + putU8 ((t.flag <<< 6) ||| 0x20 ||| (byteCount - 1)) + putU64TrimmedLE t.size + +def getTag2 : GetM Tag2 := do + let b ← getU8 + let flag := b >>> 6 + let large := b &&& 0x20 != 0 + let small := b &&& 0x1F + let size ← if large then + getU64TrimmedLE (small.toNat + 1) + else + pure small.toUInt64 + return ⟨flag, size⟩ + +/-- Tag4: 4-bit flag + size. + Header byte: [flag:4][large:1][size:3] + - If large=0: size is in low 3 bits (0-7) + - If large=1: (size+1) bytes follow containing actual size -/ structure Tag4 where - flag: Fin 16 - size: UInt64 - deriving Inhabited, Repr, BEq, Ord, Hashable - -def Tag4.encodeHead (tag: Tag4): UInt8 := - let t := UInt8.shiftLeft (UInt8.ofNat tag.flag.val) 4 - if tag.size < 8 - then t + tag.size.toUInt8 - else t + 0b1000 + (tag.size.byteCount - 1) - -def flag4_to_Fin16 (x: UInt8) : Fin 16 := - ⟨ x.toNat / 16, by - have h256 : x.toNat < 256 := UInt8.toNat_lt x - have h : x.toNat < 16 * 16 := by simpa using h256 - exact (Nat.div_lt_iff_lt_mul (by decide)).mpr h - ⟩ - -def Tag4.decodeHead (head: UInt8) : (Fin 16 × Bool × UInt8) := - let flag : Fin 16 := flag4_to_Fin16 head - let largeBit := UInt8.land head 0b1000 != 0 - let small := head % 0b1000 - (flag, largeBit, small) - -def getTagSize (large: Bool) (small: UInt8) : GetM UInt64 := do - if large then (UInt64.fromTrimmedLE ·.data) <$> getBytes (small.toNat + 1) - else return small.toNat.toUInt64 - -def putTag4 (tag: Tag4) : PutM Unit := do - putUInt8 (Tag4.encodeHead tag) - if tag.size < 8 - then pure () - else putBytes (.mk (UInt64.trimmedLE tag.size)) + flag : UInt8 + size : UInt64 + deriving BEq, Repr, Inhabited, Ord, Hashable + +def putTag4 (t : Tag4) : PutM Unit := do + if t.size < 8 then + putU8 ((t.flag <<< 4) ||| t.size.toUInt8) + else + let byteCount := u64ByteCount t.size + putU8 ((t.flag <<< 4) ||| 0x08 ||| (byteCount - 1)) + putU64TrimmedLE t.size def getTag4 : GetM Tag4 := do - let (flag, largeBit, small) <- Tag4.decodeHead <$> getUInt8 - let size <- getTagSize largeBit small - pure (Tag4.mk flag size) + let b ← getU8 + let flag := b >>> 4 + let large := b &&& 0x08 != 0 + let small := b &&& 0x07 + let size ← if large then + getU64TrimmedLE (small.toNat + 1) + else + pure small.toUInt64 + return ⟨flag, size⟩ instance : Serialize Tag4 where put := putTag4 get := getTag4 -def putBytesTagged (xs : ByteArray) : PutM Unit := do - Serialize.put (Tag4.mk 0x9 (UInt64.ofNat xs.size)) - xs.data.forM Serialize.put - -def getBytesTagged : GetM ByteArray := do - let tag : Tag4 <- Serialize.get - match tag with - | ⟨0x9, size⟩ => do - let bs <- Array.mapM (λ _ => Serialize.get) (Array.range size.toNat) - return ⟨bs⟩ - | e => throw s!"expected Bytes with tag 0x9, got {repr e}" - -instance : Serialize ByteArray where - put := putBytesTagged - get := getBytesTagged - -def putMany {A: Type} (put : A -> PutM Unit) (xs: List A) : PutM Unit := - List.forM xs put - -def getMany {A: Type} (x: Nat) (getm : GetM A) : GetM (List A) := - (List.range x).mapM (fun _ => getm) +/-! ## Universe Levels -/ + +/-- Universe levels for Lean's type system. -/ +inductive Univ where + | zero : Univ + | succ : Univ → Univ + | max : Univ → Univ → Univ + | imax : Univ → Univ → Univ + | var : UInt64 → Univ + deriving BEq, Repr, Inhabited, Hashable + +namespace Univ + def FLAG_ZERO_SUCC : UInt8 := 0 + def FLAG_MAX : UInt8 := 1 + def FLAG_IMAX : UInt8 := 2 + def FLAG_VAR : UInt8 := 3 +end Univ + +/-! ## Expressions -/ + +/-- Expression in the Ixon format. + Alpha-invariant representation of Lean expressions. + Names are stripped, binder info is stored in metadata. -/ +inductive Expr where + | sort : UInt64 → Expr + | var : UInt64 → Expr + | ref : UInt64 → Array UInt64 → Expr + | recur : UInt64 → Array UInt64 → Expr + | prj : UInt64 → UInt64 → Expr → Expr + | str : UInt64 → Expr + | nat : UInt64 → Expr + | app : Expr → Expr → Expr + | lam : Expr → Expr → Expr + | all : Expr → Expr → Expr + | letE : Bool → Expr → Expr → Expr → Expr + | share : UInt64 → Expr + deriving BEq, Repr, Inhabited, Hashable + +namespace Expr + def FLAG_SORT : UInt8 := 0x0 + def FLAG_VAR : UInt8 := 0x1 + def FLAG_REF : UInt8 := 0x2 + def FLAG_REC : UInt8 := 0x3 + def FLAG_PRJ : UInt8 := 0x4 + def FLAG_STR : UInt8 := 0x5 + def FLAG_NAT : UInt8 := 0x6 + def FLAG_APP : UInt8 := 0x7 + def FLAG_LAM : UInt8 := 0x8 + def FLAG_ALL : UInt8 := 0x9 + def FLAG_LET : UInt8 := 0xA + def FLAG_SHARE : UInt8 := 0xB +end Expr + +/-! ## Constant Types -/ + +-- DefKind, DefinitionSafety, QuotKind are defined in Ix.Common + +open Ix (DefKind DefinitionSafety QuotKind) -def puts {A: Type} [Serialize A] (xs: List A) : PutM Unit := - putMany Serialize.put xs +structure Definition where + kind : DefKind + safety : DefinitionSafety + lvls : UInt64 + typ : Expr + value : Expr + deriving BEq, Repr, Inhabited -def gets {A: Type} [Serialize A] (x: Nat) : GetM (List A) := - getMany x Serialize.get +structure RecursorRule where + fields : UInt64 + rhs : Expr + deriving BEq, Repr, Inhabited --- parameterized on a way to put a ByteArray -def putString (put: ByteArray -> PutM Unit) (x: String) : PutM Unit := - put x.toUTF8 +structure Recursor where + k : Bool + isUnsafe : Bool + lvls : UInt64 + params : UInt64 + indices : UInt64 + motives : UInt64 + minors : UInt64 + typ : Expr + rules : Array RecursorRule + deriving BEq, Repr, Inhabited -def getString (get: GetM ByteArray) : GetM String := do - let bytes <- get - match String.fromUTF8? bytes with - | .some s => return s - | .none => throw s!"invalid UTF8 bytes {bytes}" +structure Axiom where + isUnsafe : Bool + lvls : UInt64 + typ : Expr + deriving BEq, Repr, Inhabited -instance : Serialize String where - put := putString putBytesTagged - get := getString getBytesTagged +structure Quotient where + kind : QuotKind + lvls : UInt64 + typ : Expr + deriving BEq, Repr, Inhabited --- parameterized on a way to put a ByteArray -def putNat (put: ByteArray -> PutM Unit) (x: Nat) : PutM Unit := - let bytes := x.toBytesLE - put { data := bytes } +structure Constructor where + isUnsafe : Bool + lvls : UInt64 + cidx : UInt64 + params : UInt64 + fields : UInt64 + typ : Expr + deriving BEq, Repr, Inhabited -def getNat (get: GetM ByteArray) : GetM Nat := do - let bytes <- get - return Nat.fromBytesLE bytes.data +structure Inductive where + recr : Bool + refl : Bool + isUnsafe : Bool + lvls : UInt64 + params : UInt64 + indices : UInt64 + nested : UInt64 + typ : Expr + ctors : Array Constructor + deriving BEq, Repr, Inhabited -instance : Serialize Nat where - put := putNat putBytesTagged - get := getNat getBytesTagged +/-! ## Projection Types -/ -def putBool : Bool → PutM Unit -| .false => putUInt8 0 -| .true => putUInt8 1 +structure InductiveProj where + idx : UInt64 + block : Address + deriving BEq, Repr, Inhabited, Hashable -def getBool : GetM Bool := do - match (← getUInt8) with - | 0 => return .false - | 1 => return .true - | e => throw s!"expected Bool encoding between 0 and 1, got {e}" +structure ConstructorProj where + idx : UInt64 + cidx : UInt64 + block : Address + deriving BEq, Repr, Inhabited, Hashable -instance : Serialize Bool where - put := putBool - get := getBool - -def packBools (bools : List Bool) : UInt8 := - List.foldl - (λ acc (b, i) => acc ||| (if b then 1 <<< UInt8.ofNat i else 0)) - 0 - ((bools.take 8).zipIdx 0) - -def unpackBools (n: Nat) (b: UInt8) : List Bool := - ((List.range 8).map (λ i => (b &&& (1 <<< UInt8.ofNat i)) != 0)).take n - -def putBools: List Bool → PutM Unit := putUInt8 ∘ packBools -def getBools (n: Nat): GetM (List Bool) := unpackBools n <$> getUInt8 - -instance (priority := default + 100) : Serialize (Bool × Bool) where - put := fun (a, b) => putBools [a, b] - get := do match (<- getBools 2) with - | [a, b] => pure (a, b) - | e => throw s!"expected packed (Bool × Bool), got {e}" - -instance (priority := default + 101): Serialize (Bool × Bool × Bool) where - put := fun (a, b, c) => putBools [a, b, c] - get := do match (<- getBools 3) with - | [a, b, c] => pure (a, b, c) - | e => throw s!"expected packed (Bool × Bool × Bool), got {e}" - -instance (priority := default + 102): Serialize (Bool × Bool × Bool × Bool) where - put := fun (a, b, c,d) => putBools [a, b, c, d] - get := do match (<- getBools 4) with - | [a, b, c, d] => pure (a, b, c, d) - | e => throw s!"expected packed (Bool × Bool × Bool × Bool), got {e}" - -instance (priority := default + 103): Serialize (Bool × Bool × Bool × Bool × Bool) where - put := fun (a, b, c, d, e) => putBools [a, b, c, d, e] - get := do match (<- getBools 5) with - | [a, b, c, d, e] => pure (a, b, c, d, e) - | e => throw s!"expected packed (Bool × Bool × Bool × Bool × Bool), got {e}" - -instance (priority := default + 104) - : Serialize (Bool × Bool × Bool × Bool × Bool × Bool) where - put := fun (a, b, c, d, e, f) => putBools [a, b, c, d, e, f] - get := do match (<- getBools 6) with - | [a, b, c, d, e, f] => pure (a, b, c, d, e, f) - | e => throw s!"expected packed (Bool × Bool × Bool × Bool × Bool × Bool), got {e}" - -instance (priority := default + 106) - : Serialize (Bool × Bool × Bool × Bool × Bool × Bool × Bool) where - put := fun (a, b, c, d, e, f, g) => putBools [a, b, c, d, e, f, g] - get := do match (<- getBools 7) with - | [a, b, c, d, e, f, g] => pure (a, b, c, d, e, f, g) - | e => throw s!"expected packed (Bool × Bool × Bool × Bool × Bool × Bool × Bool), got {e}" - -instance (priority := default + 105) - : Serialize (Bool × Bool × Bool × Bool × Bool × Bool × Bool × Bool) where - put := fun (a, b, c, d, e, f, g, h) => putBools [a, b, c, d, e, f, g, h] - get := do match (<- getBools 8) with - | [a, b, c, d, e, f, g, h] => pure (a, b, c, d, e, f, g, h) - | e => throw s!"expected packed (Bool × Bool × Bool × Bool × Bool × Bool × Bool × Bool), got {e}" - -instance [Serialize A] : Serialize (List A) where - put xs := do - Serialize.put xs.length - puts xs - get := do - let len : Nat <- Serialize.get - gets len - -def putQuotKind : Lean.QuotKind → PutM Unit -| .type => putUInt8 0 -| .ctor => putUInt8 1 -| .lift => putUInt8 2 -| .ind => putUInt8 3 - -def getQuotKind : GetM Lean.QuotKind := do - match (← getUInt8) with - | 0 => return .type - | 1 => return .ctor - | 2 => return .lift - | 3 => return .ind - | e => throw s!"expected QuotKind encoding between 0 and 3, got {e}" - -instance : Serialize Lean.QuotKind where - put := putQuotKind - get := getQuotKind - -def putDefKind : Ix.DefKind → PutM Unit -| .«definition» => putUInt8 0 -| .«opaque» => putUInt8 1 -| .«theorem» => putUInt8 2 - -def getDefKind : GetM Ix.DefKind := do - match (← getUInt8) with - | 0 => return .definition - | 1 => return .opaque - | 2 => return .theorem - | e => throw s!"expected DefKind encoding between 0 and 3, got {e}" - -instance : Serialize Ix.DefKind where - put := putDefKind - get := getDefKind +structure RecursorProj where + idx : UInt64 + block : Address + deriving BEq, Repr, Inhabited, Hashable -def putBinderInfo : Lean.BinderInfo → PutM Unit -| .default => putUInt8 0 -| .implicit => putUInt8 1 -| .strictImplicit => putUInt8 2 -| .instImplicit => putUInt8 3 +structure DefinitionProj where + idx : UInt64 + block : Address + deriving BEq, Repr, Inhabited, Hashable -def getBinderInfo : GetM Lean.BinderInfo := do - match (← getUInt8) with - | 0 => return .default - | 1 => return .implicit - | 2 => return .strictImplicit - | 3 => return .instImplicit - | e => throw s!"expected BinderInfo encoding between 0 and 3, got {e}" +/-! ## Constant Info -/ -instance : Serialize Lean.BinderInfo where - put := putBinderInfo - get := getBinderInfo +inductive MutConst where + | defn : Definition → MutConst + | indc : Inductive → MutConst + | recr : Recursor → MutConst + deriving BEq, Repr, Inhabited + +inductive ConstantInfo where + | defn : Definition → ConstantInfo + | recr : Recursor → ConstantInfo + | axio : Axiom → ConstantInfo + | quot : Quotient → ConstantInfo + | cPrj : ConstructorProj → ConstantInfo + | rPrj : RecursorProj → ConstantInfo + | iPrj : InductiveProj → ConstantInfo + | dPrj : DefinitionProj → ConstantInfo + | muts : Array MutConst → ConstantInfo + deriving BEq, Repr, Inhabited + +namespace ConstantInfo + def CONST_DEFN : UInt64 := 0 + def CONST_RECR : UInt64 := 1 + def CONST_AXIO : UInt64 := 2 + def CONST_QUOT : UInt64 := 3 + def CONST_CPRJ : UInt64 := 4 + def CONST_RPRJ : UInt64 := 5 + def CONST_IPRJ : UInt64 := 6 + def CONST_DPRJ : UInt64 := 7 +end ConstantInfo + +/-- A top-level constant with sharing, refs, and univs tables. -/ +structure Constant where + info : ConstantInfo + sharing : Array Expr + refs : Array Address + univs : Array Univ + deriving BEq, Repr, Inhabited + +/-! ## Metadata Types -/ + +/-- Data values for KVMap metadata -/ +inductive DataValue where + | ofString (addr : Address) + | ofBool (b : Bool) + | ofName (addr : Address) + | ofNat (addr : Address) + | ofInt (addr : Address) + | ofSyntax (addr : Address) + deriving BEq, Repr, Inhabited, Hashable + +/-- Key-value map for Lean.Expr.mdata -/ +abbrev KVMap := Array (Address × DataValue) + +/-- Arena node for per-expression metadata. + Nodes are allocated bottom-up (children before parents) in the arena. + Arena indices are UInt64 values pointing into `ExprMetaArena.nodes`. -/ +inductive ExprMetaData where + | leaf + | app (fun_ : UInt64) (arg : UInt64) + | binder (name : Address) (info : Lean.BinderInfo) + (tyChild : UInt64) (bodyChild : UInt64) + | letBinder (name : Address) + (tyChild : UInt64) (valChild : UInt64) (bodyChild : UInt64) + | ref (name : Address) + | prj (structName : Address) (child : UInt64) + | mdata (mdata : Array KVMap) (child : UInt64) + deriving BEq, Repr, Inhabited + +/-- Arena for expression metadata within a single constant. -/ +structure ExprMetaArena where + nodes : Array ExprMetaData := #[] + deriving BEq, Repr, Inhabited + +def ExprMetaArena.alloc (arena : ExprMetaArena) (node : ExprMetaData) + : ExprMetaArena × UInt64 := + let idx := arena.nodes.size.toUInt64 + ({ nodes := arena.nodes.push node }, idx) + +/-- Count ExprMetaData nodes by type: (leaf, app, binder, letBinder, ref, prj, mdata) -/ +def ExprMetaArena.countByType (arena : ExprMetaArena) : Nat × Nat × Nat × Nat × Nat × Nat × Nat := + arena.nodes.foldl (init := (0, 0, 0, 0, 0, 0, 0)) fun (le, ap, bi, lb, rf, pj, md) node => + match node with + | .leaf => (le + 1, ap, bi, lb, rf, pj, md) + | .app .. => (le, ap + 1, bi, lb, rf, pj, md) + | .binder .. => (le, ap, bi + 1, lb, rf, pj, md) + | .letBinder .. => (le, ap, bi, lb + 1, rf, pj, md) + | .ref .. => (le, ap, bi, lb, rf + 1, pj, md) + | .prj .. => (le, ap, bi, lb, rf, pj + 1, md) + | .mdata .. => (le, ap, bi, lb, rf, pj, md + 1) + +/-- Count mdata items in an arena. -/ +def ExprMetaArena.mdataItemCount (arena : ExprMetaArena) : Nat := + arena.nodes.foldl (init := 0) fun acc node => + match node with + | .mdata mdata _ => acc + mdata.foldl (fun a kv => a + kv.size) 0 + | _ => acc + +/-- Per-constant metadata with arena-based expression metadata. + Each variant stores an ExprMetaArena covering all expressions in + that constant, plus root indices pointing into the arena. -/ +inductive ConstantMeta where + | empty + | defn (name : Address) (lvls : Array Address) (hints : Lean.ReducibilityHints) + (all : Array Address) (ctx : Array Address) + (arena : ExprMetaArena) (typeRoot : UInt64) (valueRoot : UInt64) + | axio (name : Address) (lvls : Array Address) + (arena : ExprMetaArena) (typeRoot : UInt64) + | quot (name : Address) (lvls : Array Address) + (arena : ExprMetaArena) (typeRoot : UInt64) + | indc (name : Address) (lvls : Array Address) (ctors : Array Address) + (all : Array Address) (ctx : Array Address) + (arena : ExprMetaArena) (typeRoot : UInt64) + | ctor (name : Address) (lvls : Array Address) (induct : Address) + (arena : ExprMetaArena) (typeRoot : UInt64) + | recr (name : Address) (lvls : Array Address) (rules : Array Address) + (all : Array Address) (ctx : Array Address) + (arena : ExprMetaArena) (typeRoot : UInt64) + (ruleRoots : Array UInt64) + deriving Inhabited, BEq, Repr + +/-- Count total arena nodes in this ConstantMeta. -/ +def ConstantMeta.exprMetaCount : ConstantMeta → Nat + | .empty => 0 + | .defn _ _ _ _ _ arena _ _ => arena.nodes.size + | .axio _ _ arena _ => arena.nodes.size + | .quot _ _ arena _ => arena.nodes.size + | .indc _ _ _ _ _ arena _ => arena.nodes.size + | .ctor _ _ _ arena _ => arena.nodes.size + | .recr _ _ _ _ _ arena _ _ => arena.nodes.size + +/-- Count total arena nodes and mdata items in this ConstantMeta. -/ +def ConstantMeta.exprMetaStats : ConstantMeta → Nat × Nat + | .empty => (0, 0) + | .defn _ _ _ _ _ arena _ _ => (arena.nodes.size, arena.mdataItemCount) + | .axio _ _ arena _ => (arena.nodes.size, arena.mdataItemCount) + | .quot _ _ arena _ => (arena.nodes.size, arena.mdataItemCount) + | .indc _ _ _ _ _ arena _ => (arena.nodes.size, arena.mdataItemCount) + | .ctor _ _ _ arena _ => (arena.nodes.size, arena.mdataItemCount) + | .recr _ _ _ _ _ arena _ _ => (arena.nodes.size, arena.mdataItemCount) + +/-- Count ExprMetaData nodes by type: (binder, letBinder, ref, prj, mdata) + (compatible signature with old ExprMetas.countByType for comparison) -/ +def ConstantMeta.exprMetaByType : ConstantMeta → Nat × Nat × Nat × Nat × Nat + | .empty => (0, 0, 0, 0, 0) + | cm => + let arena := match cm with + | .defn _ _ _ _ _ a _ _ => a + | .axio _ _ a _ => a + | .quot _ _ a _ => a + | .indc _ _ _ _ _ a _ => a + | .ctor _ _ _ a _ => a + | .recr _ _ _ _ _ a _ _ => a + | .empty => {} + let (_, _, bi, lb, rf, pj, md) := arena.countByType + (bi, lb, rf, pj, md) + +/-- A named constant with metadata -/ +structure Named where + addr : Address + constMeta : ConstantMeta := .empty + deriving Inhabited, BEq, Repr + +/-- A cryptographic commitment -/ +structure Comm where + secret : Address + payload : Address + deriving BEq, Repr, Inhabited + +namespace Constant + def FLAG_MUTS : UInt8 := 0xC + def FLAG : UInt8 := 0xD +end Constant + +/-! ## Univ Serialization -/ + +/-- Count successive .succ constructors -/ +def Univ.succCount : Univ → UInt64 + | .succ inner => 1 + inner.succCount + | _ => 0 + +/-- Get the base of a .succ chain -/ +def Univ.succBase : Univ → Univ + | .succ inner => inner.succBase + | u => u + +partial def putUniv : Univ → PutM Unit + | .zero => putTag2 ⟨Univ.FLAG_ZERO_SUCC, 0⟩ + | u@(.succ _) => do + putTag2 ⟨Univ.FLAG_ZERO_SUCC, u.succCount⟩ + putUniv u.succBase + | .max a b => do + putTag2 ⟨Univ.FLAG_MAX, 0⟩ + putUniv a + putUniv b + | .imax a b => do + putTag2 ⟨Univ.FLAG_IMAX, 0⟩ + putUniv a + putUniv b + | .var idx => putTag2 ⟨Univ.FLAG_VAR, idx⟩ + +partial def getUniv : GetM Univ := do + let tag ← getTag2 + match tag.flag with + | 0 => -- ZERO_SUCC + if tag.size == 0 then + return .zero + else + let base ← getUniv + let mut result := base + for _ in [0:tag.size.toNat] do + result := .succ result + return result + | 1 => -- MAX + let a ← getUniv + let b ← getUniv + return .max a b + | 2 => -- IMAX + let a ← getUniv + let b ← getUniv + return .imax a b + | 3 => -- VAR + return .var tag.size + | f => throw s!"getUniv: invalid flag {f}" + +instance : Serialize Univ where + put := putUniv + get := getUniv + +/-! ## Expr Serialization -/ + +/-- Collect all types in a lambda telescope. -/ +def Expr.collectLamTypes : Expr → List Expr × Expr + | .lam ty body => + let (tys, base) := body.collectLamTypes + (ty :: tys, base) + | e => ([], e) + +/-- Collect all types in a forall telescope. -/ +def Expr.collectAllTypes : Expr → List Expr × Expr + | .all ty body => + let (tys, base) := body.collectAllTypes + (ty :: tys, base) + | e => ([], e) + +/-- Collect all arguments in an application telescope (in application order). -/ +def Expr.collectAppArgs : Expr → List Expr × Expr + | .app f a => + let (args, base) := f.collectAppArgs + (args ++ [a], base) + | e => ([], e) + +partial def putExpr : Expr → PutM Unit + | .sort idx => putTag4 ⟨Expr.FLAG_SORT, idx⟩ + | .var idx => putTag4 ⟨Expr.FLAG_VAR, idx⟩ + | .ref refIdx univIdxs => do + -- Rust format: Tag4(flag, array_len), Tag0(ref_idx), then elements + putTag4 ⟨Expr.FLAG_REF, univIdxs.size.toUInt64⟩ + putTag0 ⟨refIdx⟩ + for idx in univIdxs do putTag0 ⟨idx⟩ + | .recur recIdx univIdxs => do + -- Rust format: Tag4(flag, array_len), Tag0(rec_idx), then elements + putTag4 ⟨Expr.FLAG_REC, univIdxs.size.toUInt64⟩ + putTag0 ⟨recIdx⟩ + for idx in univIdxs do putTag0 ⟨idx⟩ + | .prj typeRefIdx fieldIdx val => do + -- Rust format: Tag4(flag, field_idx), Tag0(type_ref_idx), then val + putTag4 ⟨Expr.FLAG_PRJ, fieldIdx⟩ + putTag0 ⟨typeRefIdx⟩ + putExpr val + | .str refIdx => putTag4 ⟨Expr.FLAG_STR, refIdx⟩ + | .nat refIdx => putTag4 ⟨Expr.FLAG_NAT, refIdx⟩ + | e@(.app _ _) => do + let (args, base) := e.collectAppArgs + putTag4 ⟨Expr.FLAG_APP, args.length.toUInt64⟩ + putExpr base + for arg in args do putExpr arg + | e@(.lam _ _) => do + let (tys, base) := e.collectLamTypes + putTag4 ⟨Expr.FLAG_LAM, tys.length.toUInt64⟩ + for ty in tys do putExpr ty + putExpr base + | e@(.all _ _) => do + let (tys, base) := e.collectAllTypes + putTag4 ⟨Expr.FLAG_ALL, tys.length.toUInt64⟩ + for ty in tys do putExpr ty + putExpr base + | .letE nonDep ty val body => do + putTag4 ⟨Expr.FLAG_LET, if nonDep then 1 else 0⟩ + putExpr ty + putExpr val + putExpr body + | .share idx => putTag4 ⟨Expr.FLAG_SHARE, idx⟩ + +partial def getExpr : GetM Expr := do + let tag ← getTag4 + match tag.flag with + | 0x0 => return .sort tag.size + | 0x1 => return .var tag.size + | 0x2 => do -- REF: tag.size is array_len, then ref_idx, then elements + let refIdx := (← getTag0).size + let mut univIdxs := #[] + for _ in [0:tag.size.toNat] do + univIdxs := univIdxs.push ((← getTag0).size) + return .ref refIdx univIdxs + | 0x3 => do -- REC: tag.size is array_len, then rec_idx, then elements + let recIdx := (← getTag0).size + let mut univIdxs := #[] + for _ in [0:tag.size.toNat] do + univIdxs := univIdxs.push ((← getTag0).size) + return .recur recIdx univIdxs + | 0x4 => do -- PRJ: tag.size is field_idx, then type_ref_idx, then val + let typeRefIdx := (← getTag0).size + let val ← getExpr + return .prj typeRefIdx tag.size val + | 0x5 => return .str tag.size + | 0x6 => return .nat tag.size + | 0x7 => do -- APP (telescope) + let base ← getExpr + let mut result := base + for _ in [0:tag.size.toNat] do + let arg ← getExpr + result := .app result arg + return result + | 0x8 => do -- LAM (telescope) + let mut tys := #[] + for _ in [0:tag.size.toNat] do + tys := tys.push (← getExpr) + let body ← getExpr + let mut result := body + for ty in tys.reverse do + result := .lam ty result + return result + | 0x9 => do -- ALL (telescope) + let mut tys := #[] + for _ in [0:tag.size.toNat] do + tys := tys.push (← getExpr) + let body ← getExpr + let mut result := body + for ty in tys.reverse do + result := .all ty result + return result + | 0xA => do -- LET + let nonDep := tag.size != 0 + let ty ← getExpr + let val ← getExpr + let body ← getExpr + return .letE nonDep ty val body + | 0xB => return .share tag.size + | f => throw s!"getExpr: invalid flag {f}" + +instance : Serialize Expr where + put := putExpr + get := getExpr + +/-! ## Constant Type Serialization -/ + +def packBools (bs : List Bool) : UInt8 := + bs.zipIdx.foldl (fun acc (b, i) => + if b then acc ||| ((1 : UInt8) <<< (UInt8.ofNat i)) else acc) 0 + +def unpackBools (n : Nat) (byte : UInt8) : List Bool := + (List.range n).map fun i => (byte &&& ((1 : UInt8) <<< (UInt8.ofNat i))) != 0 + +def packDefKindSafety (kind : DefKind) (safety : DefinitionSafety) : UInt8 := + let k : UInt8 := match kind with | .defn => 0 | .opaq => 1 | .thm => 2 + let s : UInt8 := match safety with | .unsaf => 0 | .safe => 1 | .part => 2 + (k <<< 2) ||| s + +def unpackDefKindSafety (b : UInt8) : DefKind × DefinitionSafety := + let kind := match b >>> 2 with | 0 => .defn | 1 => .opaq | _ => .thm + let safety := match b &&& 0x3 with | 0 => .unsaf | 1 => .safe | _ => .part + (kind, safety) + +def putDefinition (d : Definition) : PutM Unit := do + putU8 (packDefKindSafety d.kind d.safety) + putTag0 ⟨d.lvls⟩ + putExpr d.typ + putExpr d.value + +def getDefinition : GetM Definition := do + let (kind, safety) := unpackDefKindSafety (← getU8) + let lvls := (← getTag0).size + let typ ← getExpr + let value ← getExpr + return ⟨kind, safety, lvls, typ, value⟩ -def putReducibilityHints : Lean.ReducibilityHints → PutM Unit -| .«opaque» => putUInt8 0 -| .«abbrev» => putUInt8 1 -| .regular x => putUInt8 2 *> putUInt32LE x +instance : Serialize Definition where + put := putDefinition + get := getDefinition -def getReducibilityHints : GetM Lean.ReducibilityHints := do - match (← getUInt8) with - | 0 => return .«opaque» - | 1 => return .«abbrev» - | 2 => .regular <$> getUInt32LE - | e => throw s!"expected ReducibilityHints encoding between 0 and 2, got {e}" - -instance : Serialize Lean.ReducibilityHints where - put := putReducibilityHints - get := getReducibilityHints - -def putDefinitionSafety : Lean.DefinitionSafety → PutM Unit -| .«unsafe» => putUInt8 0 -| .«safe» => putUInt8 1 -| .«partial» => putUInt8 2 - -def getDefinitionSafety : GetM Lean.DefinitionSafety := do - match (← getUInt8) with - | 0 => return .«unsafe» - | 1 => return .«safe» - | 2 => return .«partial» - | e => throw s!"expected DefinitionSafety encoding between 0 and 2, got {e}" - -instance : Serialize Lean.DefinitionSafety where - put := putDefinitionSafety - get := getDefinitionSafety - -instance [Serialize A] [Serialize B] : Serialize (A × B) where - put := fun (a, b) => Serialize.put a *> Serialize.put b - get := (·,·) <$> Serialize.get <*> Serialize.get - -instance: Serialize Unit where - put _ := pure () - get := pure () +def putRecursorRule (r : RecursorRule) : PutM Unit := do + putTag0 ⟨r.fields⟩ + putExpr r.rhs -instance : Serialize Address where - put x := putBytes x.hash - get := Address.mk <$> getBytes 32 +def getRecursorRule : GetM RecursorRule := do + let fields := (← getTag0).size + let rhs ← getExpr + return ⟨fields, rhs⟩ -instance : Serialize MetaAddress where - put x := Serialize.put x.data *> Serialize.put x.meta - get := MetaAddress.mk <$> Serialize.get <*> Serialize.get +instance : Serialize RecursorRule where + put := putRecursorRule + get := getRecursorRule + +def putRecursor (r : Recursor) : PutM Unit := do + putU8 (packBools [r.k, r.isUnsafe]) + putTag0 ⟨r.lvls⟩ + putTag0 ⟨r.params⟩ + putTag0 ⟨r.indices⟩ + putTag0 ⟨r.motives⟩ + putTag0 ⟨r.minors⟩ + putExpr r.typ + putTag0 ⟨r.rules.size.toUInt64⟩ + for rule in r.rules do putRecursorRule rule + +def getRecursor : GetM Recursor := do + let bools := unpackBools 2 (← getU8) + let k := bools[0]! + let isUnsafe := bools[1]! + let lvls := (← getTag0).size + let params := (← getTag0).size + let indices := (← getTag0).size + let motives := (← getTag0).size + let minors := (← getTag0).size + let typ ← getExpr + let numRules := (← getTag0).size.toNat + let mut rules := #[] + for _ in [0:numRules] do + rules := rules.push (← getRecursorRule) + return ⟨k, isUnsafe, lvls, params, indices, motives, minors, typ, rules⟩ -structure Quotient where - kind : Lean.QuotKind - lvls : Nat - type : Address - deriving BEq, Repr, Inhabited, Ord, Hashable +instance : Serialize Recursor where + put := putRecursor + get := getRecursor -instance : Serialize Quotient where - put := fun x => Serialize.put (x.kind, x.lvls, x.type) - get := (fun (x,y,z) => .mk x y z) <$> Serialize.get +def putAxiom (a : Axiom) : PutM Unit := do + putU8 (if a.isUnsafe then 1 else 0) + putTag0 ⟨a.lvls⟩ + putExpr a.typ -structure Axiom where - isUnsafe: Bool - lvls : Nat - type : Address - deriving BEq, Repr, Inhabited, Ord, Hashable +def getAxiom : GetM Axiom := do + let isUnsafe := (← getU8) != 0 + let lvls := (← getTag0).size + let typ ← getExpr + return ⟨isUnsafe, lvls, typ⟩ instance : Serialize Axiom where - put := fun x => Serialize.put (x.isUnsafe, x.lvls, x.type) - get := (fun (x,y,z) => .mk x y z) <$> Serialize.get + put := putAxiom + get := getAxiom + +def putQuotient (q : Quotient) : PutM Unit := do + let k : UInt8 := match q.kind with | .type => 0 | .ctor => 1 | .lift => 2 | .ind => 3 + putU8 k + putTag0 ⟨q.lvls⟩ + putExpr q.typ + +def getQuotient : GetM Quotient := do + let v ← getU8 + let k : QuotKind ← match v with + | 0 => pure .type | 1 => pure .ctor | 2 => pure .lift | 3 => pure .ind + | _ => throw s!"invalid QuotKind tag {v}" + let lvls := (← getTag0).size + let typ ← getExpr + return ⟨k, lvls, typ⟩ -structure Definition where - kind : Ix.DefKind - safety : Lean.DefinitionSafety - lvls : Nat - type : Address - value : Address - deriving BEq, Repr, Inhabited, Ord, Hashable - -instance : Serialize Definition where - put x := Serialize.put (x.kind, x.safety, x.lvls, x.type, x.value) - get := (fun (a,b,c,d,e) => .mk a b c d e) <$> Serialize.get - -structure Constructor where - isUnsafe: Bool - lvls : Nat - cidx : Nat - params : Nat - fields : Nat - type : Address - deriving BEq, Repr, Inhabited, Ord, Hashable +instance : Serialize Quotient where + put := putQuotient + get := getQuotient + +def putConstructor (c : Constructor) : PutM Unit := do + putU8 (if c.isUnsafe then 1 else 0) + putTag0 ⟨c.lvls⟩ + putTag0 ⟨c.cidx⟩ + putTag0 ⟨c.params⟩ + putTag0 ⟨c.fields⟩ + putExpr c.typ + +def getConstructor : GetM Constructor := do + let isUnsafe := (← getU8) != 0 + let lvls := (← getTag0).size + let cidx := (← getTag0).size + let params := (← getTag0).size + let fields := (← getTag0).size + let typ ← getExpr + return ⟨isUnsafe, lvls, cidx, params, fields, typ⟩ instance : Serialize Constructor where - put x := Serialize.put (x.isUnsafe, x.lvls, x.cidx, x.params, x.fields, x.type) - get := (fun (a,b,c,d,e,f) => .mk a b c d e f) <$> Serialize.get - -structure RecursorRule where - fields : Nat - rhs : Address - deriving BEq, Repr, Inhabited, Ord, Hashable - -instance : Serialize RecursorRule where - put x := Serialize.put (x.fields, x.rhs) - get := (fun (a,b) => .mk a b) <$> Serialize.get - -structure Recursor where - k : Bool - isUnsafe: Bool - lvls : Nat - params : Nat - indices : Nat - motives : Nat - minors : Nat - type : Address - rules : List RecursorRule - deriving BEq, Repr, Inhabited, Ord, Hashable - -instance : Serialize Recursor where - put x := Serialize.put ((x.k, x.isUnsafe), x.lvls, x.params, x.indices, x.motives, x.minors, x.type, x.rules) - get := (fun ((a,b),c,d,e,f,g,h,i) => .mk a b c d e f g h i) <$> Serialize.get - -structure Inductive where - recr : Bool - refl : Bool - isUnsafe: Bool - lvls : Nat - params : Nat - indices : Nat - nested : Nat - type : Address - ctors : List Constructor - deriving BEq, Repr, Inhabited, Ord, Hashable + put := putConstructor + get := getConstructor + +def putInductive (i : Inductive) : PutM Unit := do + putU8 (packBools [i.recr, i.refl, i.isUnsafe]) + putTag0 ⟨i.lvls⟩ + putTag0 ⟨i.params⟩ + putTag0 ⟨i.indices⟩ + putTag0 ⟨i.nested⟩ + putExpr i.typ + putTag0 ⟨i.ctors.size.toUInt64⟩ + for c in i.ctors do putConstructor c + +def getInductive : GetM Inductive := do + let bools := unpackBools 3 (← getU8) + let recr := bools[0]! + let refl := bools[1]! + let isUnsafe := bools[2]! + let lvls := (← getTag0).size + let params := (← getTag0).size + let indices := (← getTag0).size + let nested := (← getTag0).size + let typ ← getExpr + let numCtors := (← getTag0).size.toNat + let mut ctors := #[] + for _ in [0:numCtors] do + ctors := ctors.push (← getConstructor) + return ⟨recr, refl, isUnsafe, lvls, params, indices, nested, typ, ctors⟩ instance : Serialize Inductive where - put x := Serialize.put ((x.recr,x.refl,x.isUnsafe), x.lvls, x.params, - x.indices, x.nested, x.type, x.ctors) --, x.recrs) - get := (fun ((a,b,c),d,e,f,g,h,i) => .mk a b c d e f g h i) <$> Serialize.get - -structure InductiveProj where - idx : Nat - block : Address - deriving BEq, Repr, Inhabited, Ord, Hashable + put := putInductive + get := getInductive -instance : Serialize InductiveProj where - put := fun x => Serialize.put (x.idx, x.block) - get := (fun (x,y) => .mk x y) <$> Serialize.get +def putInductiveProj (p : InductiveProj) : PutM Unit := do + putTag0 ⟨p.idx⟩ + Serialize.put p.block -structure ConstructorProj where - idx : Nat - cidx : Nat - block : Address - deriving BEq, Repr, Inhabited, Ord, Hashable +def getInductiveProj : GetM InductiveProj := do + let idx := (← getTag0).size + let block ← Serialize.get + return ⟨idx, block⟩ -instance : Serialize ConstructorProj where - put := fun x => Serialize.put (x.idx, x.cidx, x.block) - get := (fun (x,y,z) => .mk x y z) <$> Serialize.get +instance : Serialize InductiveProj where + put := putInductiveProj + get := getInductiveProj -structure RecursorProj where - idx : Nat - block : Address - deriving BEq, Repr, Inhabited, Ord, Hashable +def putConstructorProj (p : ConstructorProj) : PutM Unit := do + putTag0 ⟨p.idx⟩ + putTag0 ⟨p.cidx⟩ + Serialize.put p.block -instance : Serialize RecursorProj where - put := fun x => Serialize.put (x.idx, x.block) - get := (fun (x,y) => .mk x y) <$> Serialize.get +def getConstructorProj : GetM ConstructorProj := do + let idx := (← getTag0).size + let cidx := (← getTag0).size + let block ← Serialize.get + return ⟨idx, cidx, block⟩ -structure DefinitionProj where - idx : Nat - block : Address - deriving BEq, Repr, Inhabited, Ord, Hashable +instance : Serialize ConstructorProj where + put := putConstructorProj + get := getConstructorProj -instance : Serialize DefinitionProj where - put := fun x => Serialize.put (x.idx, x.block) - get := (fun (x,y) => .mk x y) <$> Serialize.get +def putRecursorProj (p : RecursorProj) : PutM Unit := do + putTag0 ⟨p.idx⟩ + Serialize.put p.block -structure Comm where - secret : Address - payload : Address - deriving BEq, Repr, Inhabited, Ord, Hashable +def getRecursorProj : GetM RecursorProj := do + let idx := (← getTag0).size + let block ← Serialize.get + return ⟨idx, block⟩ -instance : Serialize Comm where - put := fun x => Serialize.put (x.secret, x.payload) - get := (fun (x,y) => .mk x y) <$> Serialize.get +instance : Serialize RecursorProj where + put := putRecursorProj + get := getRecursorProj -structure Env where - env : List MetaAddress - deriving BEq, Repr, Inhabited, Ord, Hashable +def putDefinitionProj (p : DefinitionProj) : PutM Unit := do + putTag0 ⟨p.idx⟩ + Serialize.put p.block -instance : Serialize Env where - put x := Serialize.put x.env - get := .mk <$> Serialize.get - -structure EvalClaim where - lvls : Address - type : Address - input: Address - output: Address -deriving BEq, Repr, Inhabited, Ord, Hashable - -structure CheckClaim where - lvls : Address - type : Address - value : Address -deriving BEq, Repr, Inhabited, Ord, Hashable - -inductive Claim where -| evals : EvalClaim -> Claim -| checks : CheckClaim -> Claim -deriving BEq, Repr, Inhabited, Ord, Hashable - -instance : ToString CheckClaim where - toString x := s!"#{x.value} : #{x.type} @ #{x.lvls}" - -instance : ToString EvalClaim where - toString x := s!"#{x.input} ~> #{x.output} : #{x.type} @ #{x.lvls}" - -instance : ToString Claim where - toString - | .evals x => toString x - | .checks x => toString x - -instance : Serialize CheckClaim where - put x := Serialize.put (x.lvls, x.type, x.value) - get := (fun (x,y,z) => .mk x y z) <$> Serialize.get - -instance : Serialize EvalClaim where - put x := Serialize.put (x.lvls, x.type, x.input, x.output) - get := (fun (w,x,y,z) => .mk w x y z) <$> Serialize.get - -instance : Serialize Claim where - put - | .evals x => putTag4 ⟨0xE, 1⟩ *> Serialize.put x - | .checks x => putTag4 ⟨0xE, 2⟩ *> Serialize.put x - get := do match <- getTag4 with - | ⟨0xE,1⟩ => .evals <$> Serialize.get - | ⟨0xE,2⟩ => .checks <$> Serialize.get - | e => throw s!"expected Claim with tag 0xE1 or 0xE2, got {repr e}" - -structure Proof where - claim : Claim - proof : ByteArray - deriving Inhabited, BEq, Ord, Hashable - -instance : ToString Proof where - toString p := s!"<{toString p.claim} := {hexOfBytes p.proof}>" - -instance : Repr Proof where - reprPrec p _ := toString p - -instance : Serialize Proof where - put := fun x => Serialize.put (x.claim, x.proof) - get := (fun (x,y) => .mk x y) <$> Serialize.get - -structure Substring where - str: Address - startPos: Nat - stopPos: Nat - -instance : Serialize Substring where - put := fun x => Serialize.put (x.str, x.startPos, x.stopPos) - get := (fun (x,y,z) => .mk x y z) <$> Serialize.get - -inductive SourceInfo where -| original (leading: Substring) (pos: Nat) (trailing: Substring) (endPos: Nat) -| synthetic (pos endPos: Nat) (canonical: Bool) -| none - -open Serialize -def putSourceInfo : SourceInfo → PutM Unit -| .original l p t e => putUInt8 0 *> put l *> put p *> put t *> put e -| .synthetic p e c => putUInt8 1 *> put p *> put e *> put c -| .none => putUInt8 2 - -def getSourceInfo : GetM SourceInfo := do - match (← getUInt8) with - | 0 => .original <$> get <*> get <*> get <*> get - | 1 => .synthetic <$> get <*> get <*> get - | 2 => pure .none - | e => throw s!"expected SourceInfo encoding between 0 and 2, got {e}" - -instance : Serialize SourceInfo where - put := putSourceInfo - get := getSourceInfo - -inductive Preresolved where -| «namespace» (ns: Address) -| decl (n: Address) (fields: List Address) - -def putPreresolved : Preresolved → PutM Unit -| .namespace ns => putUInt8 0 *> put ns -| .decl n fs => putUInt8 1 *> put n *> put fs - -def getPreresolved : GetM Preresolved := do - match (← getUInt8) with - | 0 => .namespace <$> get - | 1 => .decl <$> get <*> get - | e => throw s!"expected Preresolved encoding between 0 and 2, got {e}" - -instance : Serialize Preresolved where - put := putPreresolved - get := getPreresolved - -inductive Syntax where -| missing -| node (info: SourceInfo) (kind: Address) (args: List Address) -| atom (info: SourceInfo) (val: Address) -| ident (info: SourceInfo) (rawVal: Substring) (val: Address) (preresolved: List Preresolved) -deriving Inhabited, Nonempty - -def putSyntax : Syntax → PutM Unit -| .missing => putUInt8 0 -| .node i k as => putUInt8 1 *> put i *> put k *> put as -| .atom i v => putUInt8 2 *> put i *> put v -| .ident i r v ps => putUInt8 3 *> put i *> put r *> put v *> put ps - -def getSyntax : GetM Syntax := do - match (← getUInt8) with - | 0 => pure .missing - | 1 => .node <$> get <*> get <*> get - | 2 => .atom <$> get <*> get - | 3 => .ident <$> get <*> get <*> get <*> get - | e => throw s!"expected Syntax encoding between 0 and 2, got {e}" - -instance : Serialize Syntax where - put := putSyntax - get := getSyntax - -def putInt : Int -> PutM Unit -| .ofNat n => putUInt8 0 *> put n -| .negSucc n => putUInt8 1 *> put n - -def getInt : GetM Int := do - match (<- getUInt8) with - | 0 => .ofNat <$> get - | 1 => .negSucc <$> get - | e => throw s!"expected Int encoding between 0 and 1, got {e}" - -instance : Serialize Int where - put := putInt - get := getInt +def getDefinitionProj : GetM DefinitionProj := do + let idx := (← getTag0).size + let block ← Serialize.get + return ⟨idx, block⟩ -inductive MutConst where -| defn : Definition -> MutConst -| indc : Inductive -> MutConst -| recr : Recursor -> MutConst -deriving BEq, Repr, Ord, Inhabited, Ord, Hashable +instance : Serialize DefinitionProj where + put := putDefinitionProj + get := getDefinitionProj def putMutConst : MutConst → PutM Unit -| .defn v => putUInt8 0 *> put v -| .indc v => putUInt8 1 *> put v -| .recr v => putUInt8 2 *> put v + | .defn d => putU8 0 *> putDefinition d + | .indc i => putU8 1 *> putInductive i + | .recr r => putU8 2 *> putRecursor r def getMutConst : GetM MutConst := do - match (← getUInt8) with - | 0 => .defn <$> get - | 1 => .indc <$> get - | 2 => .recr <$> get - | e => throw s!"expected MutConst encoding between 0 and 2, got {e}" + match ← getU8 with + | 0 => .defn <$> getDefinition + | 1 => .indc <$> getInductive + | 2 => .recr <$> getRecursor + | t => throw s!"getMutConst: invalid tag {t}" instance : Serialize MutConst where put := putMutConst get := getMutConst -instance : Serialize Int where - put := putInt - get := getInt +def putConstantInfo : ConstantInfo → PutM Unit + | .defn d => putTag4 ⟨Constant.FLAG, ConstantInfo.CONST_DEFN⟩ *> putDefinition d + | .recr r => putTag4 ⟨Constant.FLAG, ConstantInfo.CONST_RECR⟩ *> putRecursor r + | .axio a => putTag4 ⟨Constant.FLAG, ConstantInfo.CONST_AXIO⟩ *> putAxiom a + | .quot q => putTag4 ⟨Constant.FLAG, ConstantInfo.CONST_QUOT⟩ *> putQuotient q + | .cPrj p => putTag4 ⟨Constant.FLAG, ConstantInfo.CONST_CPRJ⟩ *> putConstructorProj p + | .rPrj p => putTag4 ⟨Constant.FLAG, ConstantInfo.CONST_RPRJ⟩ *> putRecursorProj p + | .iPrj p => putTag4 ⟨Constant.FLAG, ConstantInfo.CONST_IPRJ⟩ *> putInductiveProj p + | .dPrj p => putTag4 ⟨Constant.FLAG, ConstantInfo.CONST_DPRJ⟩ *> putDefinitionProj p + | .muts ms => do + putTag4 ⟨Constant.FLAG_MUTS, ms.size.toUInt64⟩ + for m in ms do putMutConst m + +def getConstantInfo : GetM ConstantInfo := do + let tag ← getTag4 + if tag.flag == Constant.FLAG_MUTS then + let mut ms := #[] + for _ in [0:tag.size.toNat] do + ms := ms.push (← getMutConst) + return .muts ms + else if tag.flag == Constant.FLAG then + match tag.size with + | 0 => .defn <$> getDefinition + | 1 => .recr <$> getRecursor + | 2 => .axio <$> getAxiom + | 3 => .quot <$> getQuotient + | 4 => .cPrj <$> getConstructorProj + | 5 => .rPrj <$> getRecursorProj + | 6 => .iPrj <$> getInductiveProj + | 7 => .dPrj <$> getDefinitionProj + | v => throw s!"getConstantInfo: invalid variant {v}" + else + throw s!"getConstantInfo: invalid flag {tag.flag}" + +instance : Serialize ConstantInfo where + put := putConstantInfo + get := getConstantInfo + +def putConstant (c : Constant) : PutM Unit := do + putConstantInfo c.info + putTag0 ⟨c.sharing.size.toUInt64⟩ + for e in c.sharing do putExpr e + putTag0 ⟨c.refs.size.toUInt64⟩ + for a in c.refs do Serialize.put a + putTag0 ⟨c.univs.size.toUInt64⟩ + for u in c.univs do putUniv u + +def getConstant : GetM Constant := do + let info ← getConstantInfo + let numSharing := (← getTag0).size.toNat + let mut sharing := #[] + for _ in [0:numSharing] do + sharing := sharing.push (← getExpr) + let numRefs := (← getTag0).size.toNat + let mut refs := #[] + for _ in [0:numRefs] do + refs := refs.push (← Serialize.get) + let numUnivs := (← getTag0).size.toNat + let mut univs := #[] + for _ in [0:numUnivs] do + univs := univs.push (← getUniv) + return ⟨info, sharing, refs, univs⟩ + +instance : Serialize Constant where + put := putConstant + get := getConstant + +/-! ## Convenience functions for serialization -/ + +def serUniv (u : Univ) : ByteArray := runPut (putUniv u) +def desUniv (bytes : ByteArray) : Except String Univ := runGet getUniv bytes + +def serExpr (e : Expr) : ByteArray := runPut (putExpr e) +def desExpr (bytes : ByteArray) : Except String Expr := runGet getExpr bytes + +def serConstant (c : Constant) : ByteArray := runPut (putConstant c) +def desConstant (bytes : ByteArray) : Except String Constant := runGet getConstant bytes + +/-! ## Metadata Serialization -/ + +/-- Type alias for name index (Address → u64). -/ +abbrev NameIndex := Std.HashMap Address UInt64 + +/-- Type alias for reverse name index (position → Address). -/ +abbrev NameReverseIndex := Array Address + +/-- Put an address as an index. -/ +def putIdx (addr : Address) (idx : NameIndex) : PutM Unit := do + let i := idx.get? addr |>.getD 0 + putTag0 ⟨i⟩ + +/-- Get an address from an index. -/ +def getIdx (rev : NameReverseIndex) : GetM Address := do + let i := (← getTag0).size.toNat + match rev[i]? with + | some addr => pure addr + | none => throw s!"invalid name index {i}, max {rev.size}" + +/-- Put a vector of addresses as indices. -/ +def putIdxVec (addrs : Array Address) (idx : NameIndex) : PutM Unit := do + putTag0 ⟨addrs.size.toUInt64⟩ + for a in addrs do putIdx a idx + +/-- Get a vector of addresses from indices. -/ +def getIdxVec (rev : NameReverseIndex) : GetM (Array Address) := do + let len := (← getTag0).size.toNat + let mut v := #[] + for _ in [0:len] do + v := v.push (← getIdx rev) + pure v + +/-- Serialize BinderInfo. -/ +def putBinderInfo : Lean.BinderInfo → PutM Unit + | .default => putU8 0 + | .implicit => putU8 1 + | .strictImplicit => putU8 2 + | .instImplicit => putU8 3 + +def getBinderInfo : GetM Lean.BinderInfo := do + match ← getU8 with + | 0 => pure .default + | 1 => pure .implicit + | 2 => pure .strictImplicit + | 3 => pure .instImplicit + | x => throw s!"invalid BinderInfo {x}" + +/-- Serialize ReducibilityHints. -/ +def putReducibilityHints : Lean.ReducibilityHints → PutM Unit + | .opaque => putU8 0 + | .abbrev => putU8 1 + | .regular n => do putU8 2; putTag0 ⟨n.toUInt64⟩ + +def getReducibilityHints : GetM Lean.ReducibilityHints := do + match ← getU8 with + | 0 => pure .opaque + | 1 => pure .abbrev + | 2 => pure (.regular (← getTag0).size.toUInt32) + | x => throw s!"invalid ReducibilityHints {x}" + +/-- Serialize DataValue with indexed addresses. + OfString/OfNat/OfInt/OfSyntax use raw 32-byte addresses (blob addresses, not in name index). -/ +def putDataValueIndexed (dv : DataValue) (idx : NameIndex) : PutM Unit := do + match dv with + | .ofString a => putU8 0 *> Serialize.put a + | .ofBool b => putU8 1 *> Serialize.put b + | .ofName a => putU8 2 *> putIdx a idx + | .ofNat a => putU8 3 *> Serialize.put a + | .ofInt a => putU8 4 *> Serialize.put a + | .ofSyntax a => putU8 5 *> Serialize.put a + +def getDataValueIndexed (rev : NameReverseIndex) : GetM DataValue := do + match ← getU8 with + | 0 => .ofString <$> Serialize.get + | 1 => .ofBool <$> Serialize.get + | 2 => .ofName <$> getIdx rev + | 3 => .ofNat <$> Serialize.get + | 4 => .ofInt <$> Serialize.get + | 5 => .ofSyntax <$> Serialize.get + | x => throw s!"invalid DataValue tag {x}" + +/-- Serialize KVMap with indexed addresses. -/ +def putKVMapIndexed (kvmap : KVMap) (idx : NameIndex) : PutM Unit := do + putTag0 ⟨kvmap.size.toUInt64⟩ + for (k, v) in kvmap do + putIdx k idx + putDataValueIndexed v idx + +def getKVMapIndexed (rev : NameReverseIndex) : GetM KVMap := do + let len := (← getTag0).size.toNat + let mut kvmap := #[] + for _ in [0:len] do + let k ← getIdx rev + let v ← getDataValueIndexed rev + kvmap := kvmap.push (k, v) + pure kvmap + +/-- Serialize mdata stack (Array KVMap) with indexed addresses. -/ +def putMdataStackIndexed (mdata : Array KVMap) (idx : NameIndex) : PutM Unit := do + putTag0 ⟨mdata.size.toUInt64⟩ + for kv in mdata do putKVMapIndexed kv idx + +def getMdataStackIndexed (rev : NameReverseIndex) : GetM (Array KVMap) := do + let len := (← getTag0).size.toNat + let mut mdata := #[] + for _ in [0:len] do + mdata := mdata.push (← getKVMapIndexed rev) + pure mdata + +/-- Serialize ExprMetaData with indexed addresses. Arena indices use Tag0 encoding. -/ +def putExprMetaDataIndexed (em : ExprMetaData) (idx : NameIndex) : PutM Unit := do + match em with + | .leaf => putU8 0 + | .app f a => + putU8 1 + putTag0 ⟨f⟩ + putTag0 ⟨a⟩ + | .binder name info tyChild bodyChild => + let tag : UInt8 := 2 + match info with + | .default => 0 | .implicit => 1 | .strictImplicit => 2 | .instImplicit => 3 + putU8 tag + putIdx name idx + putTag0 ⟨tyChild⟩ + putTag0 ⟨bodyChild⟩ + | .letBinder name tyChild valChild bodyChild => + putU8 6 + putIdx name idx + putTag0 ⟨tyChild⟩ + putTag0 ⟨valChild⟩ + putTag0 ⟨bodyChild⟩ + | .ref name => + putU8 7 + putIdx name idx + | .prj structName child => + putU8 8 + putIdx structName idx + putTag0 ⟨child⟩ + | .mdata mdata child => + putU8 9 + putMdataStackIndexed mdata idx + putTag0 ⟨child⟩ + +def getExprMetaDataIndexed (rev : NameReverseIndex) : GetM ExprMetaData := do + let tag ← getU8 + match tag with + | 0 => pure .leaf + | 1 => + let f := (← getTag0).size + let a := (← getTag0).size + pure (.app f a) + | 2 | 3 | 4 | 5 => + let info := match tag with + | 2 => Lean.BinderInfo.default | 3 => .implicit + | 4 => .strictImplicit | _ => .instImplicit + let name ← getIdx rev + let tyChild := (← getTag0).size + let bodyChild := (← getTag0).size + pure (.binder name info tyChild bodyChild) + | 6 => + let name ← getIdx rev + let tyChild := (← getTag0).size + let valChild := (← getTag0).size + let bodyChild := (← getTag0).size + pure (.letBinder name tyChild valChild bodyChild) + | 7 => + let name ← getIdx rev + pure (.ref name) + | 8 => + let structName ← getIdx rev + let child := (← getTag0).size + pure (.prj structName child) + | 9 => + let mdata ← getMdataStackIndexed rev + let child := (← getTag0).size + pure (.mdata mdata child) + | x => throw s!"invalid ExprMetaData tag {x}" + +/-- Serialize ExprMetaArena (length-prefixed array of ExprMetaData nodes). -/ +def putExprMetaArenaIndexed (arena : ExprMetaArena) (idx : NameIndex) : PutM Unit := do + putTag0 ⟨arena.nodes.size.toUInt64⟩ + for node in arena.nodes do + putExprMetaDataIndexed node idx + +def getExprMetaArenaIndexed (rev : NameReverseIndex) : GetM ExprMetaArena := do + let len := (← getTag0).size.toNat + let mut nodes : Array ExprMetaData := #[] + for _ in [0:len] do + nodes := nodes.push (← getExprMetaDataIndexed rev) + pure ⟨nodes⟩ + +/-- Serialize ConstantMeta with indexed addresses. -/ +def putConstantMetaIndexed (cm : ConstantMeta) (idx : NameIndex) : PutM Unit := do + match cm with + | .empty => putU8 255 + | .defn name lvls hints all ctx arena typeRoot valueRoot => + putU8 0 + putIdx name idx + putIdxVec lvls idx + putReducibilityHints hints + putIdxVec all idx + putIdxVec ctx idx + putExprMetaArenaIndexed arena idx + putTag0 ⟨typeRoot⟩ + putTag0 ⟨valueRoot⟩ + | .axio name lvls arena typeRoot => + putU8 1 + putIdx name idx + putIdxVec lvls idx + putExprMetaArenaIndexed arena idx + putTag0 ⟨typeRoot⟩ + | .quot name lvls arena typeRoot => + putU8 2 + putIdx name idx + putIdxVec lvls idx + putExprMetaArenaIndexed arena idx + putTag0 ⟨typeRoot⟩ + | .indc name lvls ctors all ctx arena typeRoot => + putU8 3 + putIdx name idx + putIdxVec lvls idx + putIdxVec ctors idx + putIdxVec all idx + putIdxVec ctx idx + putExprMetaArenaIndexed arena idx + putTag0 ⟨typeRoot⟩ + | .ctor name lvls induct arena typeRoot => + putU8 4 + putIdx name idx + putIdxVec lvls idx + putIdx induct idx + putExprMetaArenaIndexed arena idx + putTag0 ⟨typeRoot⟩ + | .recr name lvls rules all ctx arena typeRoot ruleRoots => + putU8 5 + putIdx name idx + putIdxVec lvls idx + putIdxVec rules idx + putIdxVec all idx + putIdxVec ctx idx + putExprMetaArenaIndexed arena idx + putTag0 ⟨typeRoot⟩ + putTag0 ⟨ruleRoots.size.toUInt64⟩ + for r in ruleRoots do putTag0 ⟨r⟩ + +def getConstantMetaIndexed (rev : NameReverseIndex) : GetM ConstantMeta := do + match ← getU8 with + | 255 => pure .empty + | 0 => + let name ← getIdx rev + let lvls ← getIdxVec rev + let hints ← getReducibilityHints + let all ← getIdxVec rev + let ctx ← getIdxVec rev + let arena ← getExprMetaArenaIndexed rev + let typeRoot := (← getTag0).size + let valueRoot := (← getTag0).size + pure (.defn name lvls hints all ctx arena typeRoot valueRoot) + | 1 => + let name ← getIdx rev + let lvls ← getIdxVec rev + let arena ← getExprMetaArenaIndexed rev + let typeRoot := (← getTag0).size + pure (.axio name lvls arena typeRoot) + | 2 => + let name ← getIdx rev + let lvls ← getIdxVec rev + let arena ← getExprMetaArenaIndexed rev + let typeRoot := (← getTag0).size + pure (.quot name lvls arena typeRoot) + | 3 => + let name ← getIdx rev + let lvls ← getIdxVec rev + let ctors ← getIdxVec rev + let all ← getIdxVec rev + let ctx ← getIdxVec rev + let arena ← getExprMetaArenaIndexed rev + let typeRoot := (← getTag0).size + pure (.indc name lvls ctors all ctx arena typeRoot) + | 4 => + let name ← getIdx rev + let lvls ← getIdxVec rev + let induct ← getIdx rev + let arena ← getExprMetaArenaIndexed rev + let typeRoot := (← getTag0).size + pure (.ctor name lvls induct arena typeRoot) + | 5 => + let name ← getIdx rev + let lvls ← getIdxVec rev + let rules ← getIdxVec rev + let all ← getIdxVec rev + let ctx ← getIdxVec rev + let arena ← getExprMetaArenaIndexed rev + let typeRoot := (← getTag0).size + let numRuleRoots := (← getTag0).size.toNat + let mut ruleRoots : Array UInt64 := #[] + for _ in [0:numRuleRoots] do + ruleRoots := ruleRoots.push (← getTag0).size + pure (.recr name lvls rules all ctx arena typeRoot ruleRoots) + | x => throw s!"invalid ConstantMeta tag {x}" + +/-- Serialize Comm (simple - just two addresses). -/ +def putComm (c : Comm) : PutM Unit := do + Serialize.put c.secret + Serialize.put c.payload + +def getComm : GetM Comm := do + let secret ← Serialize.get + let payload ← Serialize.get + pure ⟨secret, payload⟩ + +instance : Serialize Comm where + put := putComm + get := getComm -inductive BuiltIn where -| obj : BuiltIn -| neutral : BuiltIn -| unreachable : BuiltIn -deriving BEq, Repr, Ord, Inhabited, Ord, Hashable +/-- Convenience serialization for Comm (untagged). -/ +def serComm (c : Comm) : ByteArray := runPut (putComm c) +def desComm (bytes : ByteArray) : Except String Comm := runGet getComm bytes -def putBuiltIn : BuiltIn → PutM Unit -| .obj => putUInt8 0 -| .neutral => putUInt8 1 -| .unreachable => putUInt8 2 +/-- Serialize Comm with Tag4{0xE, 5} header. -/ +def putCommTagged (c : Comm) : PutM Unit := do + putTag4 ⟨0xE, 5⟩ + putComm c -def getBuiltIn : GetM BuiltIn := do - match (← getUInt8) with - | 0 => pure .obj - | 1 => pure .neutral - | 2 => pure .unreachable - | e => throw s!"expected BuiltIn encoding between 0 and 5, got {e}" +/-- Serialize Comm with Tag4{0xE, 5} header to bytes. -/ +def serCommTagged (c : Comm) : ByteArray := runPut (putCommTagged c) -instance : Serialize BuiltIn where - put := putBuiltIn - get := getBuiltIn +/-- Compute commitment address: blake3(Tag4{0xE,5} + secret + payload). -/ +def Comm.commit (c : Comm) : Address := Address.blake3 (serCommTagged c) -inductive DataValue where -| ofString (v: Address) -| ofBool (v: Bool) -| ofName (v: Address) -| ofNat (v: Address) -| ofInt (v: Address) -| ofSyntax (v: Address) -deriving BEq, Repr, Ord, Inhabited, Ord, Hashable - -def putDataValue : DataValue → PutM Unit -| .ofString v => putUInt8 0 *> put v -| .ofBool v => putUInt8 1 *> put v -| .ofName v => putUInt8 2 *> put v -| .ofNat v => putUInt8 3 *> put v -| .ofInt v => putUInt8 4 *> put v -| .ofSyntax v => putUInt8 5 *> put v - -def getDataValue : GetM DataValue := do - match (← getUInt8) with - | 0 => .ofString <$> get - | 1 => .ofBool <$> get - | 2 => .ofName <$> get - | 3 => .ofNat <$> get - | 4 => .ofInt <$> get - | 5 => .ofSyntax <$> get - | e => throw s!"expected DataValue encoding between 0 and 5, got {e}" - -instance : Serialize DataValue where - put := putDataValue - get := getDataValue - -inductive Metadatum where -| link : Address -> Metadatum -| info : Lean.BinderInfo -> Metadatum -| hints : Lean.ReducibilityHints -> Metadatum -| links : List Address -> Metadatum -| map : List (Address × Address) -> Metadatum -| kvmap : List (Address × DataValue) -> Metadatum -| muts : List (List Address) -> Metadatum -deriving BEq, Repr, Ord, Inhabited, Ord, Hashable - -structure Metadata where - nodes: List Metadatum - deriving BEq, Repr, Inhabited, Ord, Hashable +/-! ## Ixon Environment -/ -def putMetadatum : Metadatum → PutM Unit -| .link n => putUInt8 0 *> put n -| .info i => putUInt8 1 *> putBinderInfo i -| .hints h => putUInt8 2 *> putReducibilityHints h -| .links ns => putUInt8 3 *> put ns -| .map ns => putUInt8 4 *> put ns -| .kvmap map => putUInt8 5 *> put map -| .muts map => putUInt8 6 *> put map - -def getMetadatum : GetM Metadatum := do - match (<- getUInt8) with - | 0 => .link <$> get - | 1 => .info <$> get - | 2 => .hints <$> get - | 3 => .links <$> get - | 4 => .map <$> get - | 5 => .kvmap <$> get - | 6 => .muts <$> get - | e => throw s!"expected Metadatum encoding between 0 and 5, got {e}" - -instance : Serialize Metadatum where - put := putMetadatum - get := getMetadatum - -instance : Serialize Metadata where - put m := put (Tag4.mk 0xF m.nodes.length.toUInt64) *> putMany put m.nodes - get := do - let tag <- getTag4 - match tag with - | ⟨0xF, x⟩ => do - let nodes <- getMany x.toNat Serialize.get - return ⟨nodes⟩ - | x => throw s!"Expected metadata tag, got {repr x}" - --- TODO: docs -inductive Ixon where -| nanon : Ixon -- 0x00 anon name -| nstr : Address -> Address -> Ixon -- 0x01 str name -| nnum : Address -> Address -> Ixon -- 0x02 num name -| uzero : Ixon -- 0x03 univ zero -| usucc : Address -> Ixon -- 0x04 univ succ -| umax : Address -> Address -> Ixon -- 0x05 univ max -| uimax : Address -> Address -> Ixon -- 0x06 univ imax -| uvar : Nat -> Ixon -- 0x1X univ var -| evar : Nat -> Ixon -- 0x2X, variables -| eref : Address -> List Address -> Ixon -- 0x3X, global reference -| erec : Nat -> List Address -> Ixon -- 0x4X, local recursion -| eprj : Address -> Nat -> Address -> Ixon -- 0x5X, structure projection -| esort : Address -> Ixon -- 0x80, universes -| estr : Address -> Ixon -- 0x81, utf8 string -| enat : Address -> Ixon -- 0x82, natural number -| eapp : Address -> Address -> Ixon -- 0x83, application -| elam : Address -> Address -> Ixon -- 0x84, lambda -| eall : Address -> Address -> Ixon -- 0x85, forall -| elet : Bool -> Address -> Address -> Address -> Ixon -- 0x86, 0x87, let -| blob : ByteArray -> Ixon -- 0x9X, bytes -| defn : Definition -> Ixon -- 0xA0, definition -| recr : Recursor -> Ixon -- 0xA1, recursor -| axio : Axiom -> Ixon -- 0xA2, axiom -| quot : Quotient -> Ixon -- 0xA3, quotient -| cprj : ConstructorProj -> Ixon -- 0xA4, ctor projection -| rprj : RecursorProj -> Ixon -- 0xA5, recr projection -| iprj : InductiveProj -> Ixon -- 0xA6, indc projection -| dprj : DefinitionProj -> Ixon -- 0xA7, defn projection -| muts : List MutConst -> Ixon -- 0xBX, mutual constants -| prof : Proof -> Ixon -- 0xE0, zero-knowledge proof -| eval : EvalClaim -> Ixon -- 0xE1, evaluation claim -| chck : CheckClaim -> Ixon -- 0xE2, typechecking claim -| comm : Comm -> Ixon -- 0xE3, cryptographic commitment -| envn : Env -> Ixon -- 0xE4, Lean4 environment -| prim : BuiltIn -> Ixon -- 0xE5, compiler builtins -| «meta» : Metadata -> Ixon -- 0xFX, Lean4 metadata -deriving BEq, Repr, Inhabited, Ord, Hashable - -def putIxon : Ixon -> PutM Unit -| .nanon => put (Tag4.mk 0x0 0) -| .nstr n s => put (Tag4.mk 0x0 1) *> put n *> put s -| .nnum n i => put (Tag4.mk 0x0 2) *> put n *> put i -| .uzero => put (Tag4.mk 0x0 3) -| .usucc u => put (Tag4.mk 0x0 4) *> put u -| .umax x y => put (Tag4.mk 0x0 5) *> put x *> put y -| .uimax x y => put (Tag4.mk 0x0 6) *> put x *> put y -| .uvar x => - let bytes := x.toBytesLE - put (Tag4.mk 0x1 bytes.size.toUInt64) *> putBytes ⟨bytes⟩ -| .evar x => - let bytes := x.toBytesLE - put (Tag4.mk 0x2 bytes.size.toUInt64) *> putBytes ⟨bytes⟩ -| .eref a ls => put (Tag4.mk 0x3 ls.length.toUInt64) *> put a *> puts ls -| .erec i ls => - put (Tag4.mk 0x4 ls.length.toUInt64) *> put i *> puts ls -| .eprj t n x => - let bytes := n.toBytesLE - put (Tag4.mk 0x5 bytes.size.toUInt64) *> put t *> putBytes ⟨bytes⟩ *> put x -| .esort u => put (Tag4.mk 0x8 0x0) *> put u -| .estr s => put (Tag4.mk 0x8 0x1) *> put s -| .enat n => put (Tag4.mk 0x8 0x2) *> put n -| .eapp f a => put (Tag4.mk 0x8 0x3) *> put f *> put a -| .elam t b => put (Tag4.mk 0x8 0x4) *> put t *> put b -| .eall t b => put (Tag4.mk 0x8 0x5) *> put t *> put b -| .elet nD t d b => if nD - then put (Tag4.mk 0x8 0x6) *> put t *> put d *> put b - else put (Tag4.mk 0x8 0x7) *> put t *> put d *> put b -| .blob xs => put (Tag4.mk 0x9 xs.size.toUInt64) *> xs.data.forM put -| .defn x => put (Tag4.mk 0xA 0x0) *> put x -| .recr x => put (Tag4.mk 0xA 0x1) *> put x -| .axio x => put (Tag4.mk 0xA 0x2) *> put x -| .quot x => put (Tag4.mk 0xA 0x3) *> put x -| .cprj x => put (Tag4.mk 0xA 0x4) *> put x -| .rprj x => put (Tag4.mk 0xA 0x5) *> put x -| .iprj x => put (Tag4.mk 0xA 0x6) *> put x -| .dprj x => put (Tag4.mk 0xA 0x7) *> put x -| .muts xs => put (Tag4.mk 0xB xs.length.toUInt64) *> puts xs -| .prof x => put (Tag4.mk 0xE 0x0) *> put x -| .eval x => put (Tag4.mk 0xE 0x1) *> put x -| .chck x => put (Tag4.mk 0xE 0x2) *> put x -| .comm x => put (Tag4.mk 0xE 0x3) *> put x -| .envn x => put (Tag4.mk 0xE 0x4) *> put x -| .prim x => put (Tag4.mk 0xE 0x5) *> put x -| .meta m => put m - -def getIxon : GetM Ixon := do - let tag <- getTag4 +/-- The Ixon environment, containing all compiled constants. + Mirrors Rust's `ix::ixon::env::Env` structure. -/ +structure Env where + /-- Alpha-invariant constants: Address → Constant -/ + consts : Std.HashMap Address Constant := {} + /-- Named references: Ix.Name → Named (includes address + metadata) -/ + named : Std.HashMap Ix.Name Named := {} + /-- Raw data blobs: Address → bytes -/ + blobs : Std.HashMap Address ByteArray := {} + /-- Hash-consed name components: Address → Ix.Name -/ + names : Std.HashMap Address Ix.Name := {} + /-- Cryptographic commitments: Address → Comm -/ + comms : Std.HashMap Address Comm := {} + /-- Reverse index: constant Address → Ix.Name -/ + addrToName : Std.HashMap Address Ix.Name := {} + deriving Inhabited + +namespace Env + +/-- Store a constant at the given address. -/ +def storeConst (env : Env) (addr : Address) (const : Constant) : Env := + { env with consts := env.consts.insert addr const } + +/-- Get a constant by address. -/ +def getConst? (env : Env) (addr : Address) : Option Constant := + env.consts.get? addr + +/-- Register a name with full Named metadata. -/ +def registerName (env : Env) (name : Ix.Name) (named : Named) : Env := + { env with + named := env.named.insert name named + addrToName := env.addrToName.insert named.addr name } + +/-- Register a name with just an address (empty metadata). -/ +def registerNameAddr (env : Env) (name : Ix.Name) (addr : Address) : Env := + env.registerName name { addr, constMeta := .empty } + +/-- Look up a name's address. -/ +def getAddr? (env : Env) (name : Ix.Name) : Option Address := + env.named.get? name |>.map (·.addr) + +/-- Look up a name's Named entry. -/ +def getNamed? (env : Env) (name : Ix.Name) : Option Named := + env.named.get? name + +/-- Look up an address's name. -/ +def getName? (env : Env) (addr : Address) : Option Ix.Name := + env.addrToName.get? addr + +/-- Store a blob and return its content address. -/ +def storeBlob (env : Env) (bytes : ByteArray) : Env × Address := + let addr := Address.blake3 bytes + ({ env with blobs := env.blobs.insert addr bytes }, addr) + +/-- Get a blob by address. -/ +def getBlob? (env : Env) (addr : Address) : Option ByteArray := + env.blobs.get? addr + +/-- Store a commitment. -/ +def storeComm (env : Env) (addr : Address) (comm : Comm) : Env := + { env with comms := env.comms.insert addr comm } + +/-- Get a commitment by address. -/ +def getComm? (env : Env) (addr : Address) : Option Comm := + env.comms.get? addr + +/-- Number of constants. -/ +def constCount (env : Env) : Nat := env.consts.size + +/-- Number of blobs. -/ +def blobCount (env : Env) : Nat := env.blobs.size + +/-- Number of named constants. -/ +def namedCount (env : Env) : Nat := env.named.size + +/-- Number of commitments. -/ +def commCount (env : Env) : Nat := env.comms.size + +instance : Repr Env where + reprPrec env _ := s!"Env({env.constCount} consts, {env.blobCount} blobs, {env.namedCount} named)" + +end Env + +/-! ## Raw FFI Types for Env -/ + +/-- Raw FFI structure for a constant: Address → Constant. + Array-based version for FFI compatibility (no HashMap). -/ +structure RawConst where + addr : Address + const : Constant + deriving Repr, Inhabited, BEq + +/-- Raw FFI structure for a named entry: Ix.Name → (Address, ConstantMeta). + Array-based version for FFI compatibility (no HashMap). -/ +structure RawNamed where + name : Ix.Name + addr : Address + constMeta : ConstantMeta + deriving Repr, Inhabited, BEq + +/-- Raw FFI structure for a blob: Address → ByteArray. + Array-based version for FFI compatibility (no HashMap). -/ +structure RawBlob where + addr : Address + bytes : ByteArray + deriving Repr, Inhabited, BEq + +/-- Raw FFI structure for a commitment: Address → Comm. + Array-based version for FFI compatibility (no HashMap). -/ +structure RawComm where + addr : Address + comm : Comm + deriving Repr, Inhabited, BEq + +/-- Raw FFI name entry: address → Ix.Name mapping. + Used to transfer the full names table across FFI. -/ +structure RawNameEntry where + addr : Address + name : Ix.Name + deriving Repr, Inhabited, BEq + +/-- Raw FFI environment structure using arrays instead of HashMaps. + This is the array-based equivalent of `Env` for FFI compatibility. -/ +structure RawEnv where + consts : Array RawConst + named : Array RawNamed + blobs : Array RawBlob + comms : Array RawComm + names : Array RawNameEntry := #[] + deriving Repr, Inhabited, BEq + +namespace RawEnv + +/-- Recursively add all name components to the names map. + Uses Ix.Name.getHash for address computation. -/ +partial def addNameComponents (names : Std.HashMap Address Ix.Name) (name : Ix.Name) : Std.HashMap Address Ix.Name := + let addr := name.getHash + if names.contains addr then names + else + let names := names.insert addr name + match name with + | .anonymous _ => names + | .str parent _ _ => addNameComponents names parent + | .num parent _ _ => addNameComponents names parent + +/-- Recursively add all name components to the names map AND store string components as blobs. + This matches Rust's behavior for deduplication of string data. -/ +partial def addNameComponentsWithBlobs + (names : Std.HashMap Address Ix.Name) + (blobs : Std.HashMap Address ByteArray) + (name : Ix.Name) + : Std.HashMap Address Ix.Name × Std.HashMap Address ByteArray := + let addr := name.getHash + if names.contains addr then (names, blobs) + else + let names := names.insert addr name + match name with + | .anonymous _ => (names, blobs) + | .str parent s _ => + -- Store string component as blob for deduplication + let strBytes := s.toUTF8 + let strAddr := Address.blake3 strBytes + let blobs := blobs.insert strAddr strBytes + addNameComponentsWithBlobs names blobs parent + | .num parent _ _ => + addNameComponentsWithBlobs names blobs parent + +/-- Convert RawEnv to Env with HashMaps. + This is done on the Lean side for correct hash function usage. -/ +def toEnv (raw : RawEnv) : Env := Id.run do + let mut env : Env := {} + for ⟨addr, const⟩ in raw.consts do + env := env.storeConst addr const + -- Load the full names table (includes binder names, level params, etc.) + -- Use addNameComponents to store at canonical addresses (name.getHash) + -- and ensure all parent components are present for topological consistency. + for ⟨_, name⟩ in raw.names do + env := { env with names := addNameComponents env.names name } + for ⟨name, addr, constMeta⟩ in raw.named do + -- Also add name components for indexed serialization + env := { env with names := addNameComponents env.names name } + env := env.registerName name ⟨addr, constMeta⟩ + for ⟨addr, bytes⟩ in raw.blobs do + env := { env with blobs := env.blobs.insert addr bytes } + for ⟨addr, comm⟩ in raw.comms do + env := env.storeComm addr comm + return env + +end RawEnv + +/-! ## Env Serialization -/ + +namespace Env + +/-- Convert Env with HashMaps to RawEnv with Arrays for FFI. + Includes the full names table for round-trip fidelity. -/ +def toRawEnv (env : Env) : RawEnv := { + consts := env.consts.toArray.map fun (addr, const) => { addr, const } + named := env.named.toArray.map fun (name, n) => { name, addr := n.addr, constMeta := n.constMeta } + blobs := env.blobs.toArray.map fun (addr, bytes) => { addr, bytes } + comms := env.comms.toArray.map fun (addr, comm) => { addr, comm } + names := env.names.toArray.map fun (addr, name) => { addr, name } +} + +/-- Tag4 flag for Env (0xE), variant 0. -/ +def FLAG : UInt8 := 0xE + +/-- Serialize a name component (references parent by address). + Format: tag (1 byte) + parent_addr (32 bytes) + data -/ +def putNameComponent (name : Ix.Name) : PutM Unit := do + match name with + | .anonymous _ => putU8 0 + | .str parent s _ => + putU8 1 + Serialize.put parent.getHash + putTag0 ⟨s.utf8ByteSize.toUInt64⟩ + putBytes s.toUTF8 + | .num parent n _ => + putU8 2 + Serialize.put parent.getHash + let bytes := ByteArray.mk (Nat.toBytesLE n) + putTag0 ⟨bytes.size.toUInt64⟩ + putBytes bytes + +/-- Deserialize a name component using a lookup table for parents. -/ +def getNameComponent (namesLookup : Std.HashMap Address Ix.Name) : GetM Ix.Name := do + let tag ← getU8 match tag with - | ⟨0x0, 0⟩ => pure <| .nanon - | ⟨0x0, 1⟩ => .nstr <$> get <*> get - | ⟨0x0, 2⟩ => .nnum <$> get <*> get - | ⟨0x0, 3⟩ => pure <| .uzero - | ⟨0x0, 4⟩ => .usucc <$> get - | ⟨0x0, 5⟩ => .umax <$> get <*> get - | ⟨0x0, 6⟩ => .uimax <$> get <*> get - | ⟨0x1, x⟩ => .uvar <$> getNat (getBytes x.toNat) - | ⟨0x2, x⟩ => .evar <$> getNat (getBytes x.toNat) - | ⟨0x3, x⟩ => .eref <$> get <*> gets x.toNat - | ⟨0x4, x⟩ => .erec <$> get <*> gets x.toNat - | ⟨0x5, x⟩ => .eprj <$> get <*> getNat (getBytes x.toNat) <*> get - | ⟨0x8, 0⟩ => .esort <$> get - | ⟨0x8, 1⟩ => .estr <$> get - | ⟨0x8, 2⟩ => .enat <$> get - | ⟨0x8, 3⟩ => .eapp <$> get <*> get - | ⟨0x8, 4⟩ => .elam <$> get <*> get - | ⟨0x8, 5⟩ => .eall <$> get <*> get - | ⟨0x8, 6⟩ => .elet true <$> get <*> get <*> get - | ⟨0x8, 7⟩ => .elet false <$> get <*> get <*> get - | ⟨0x9, x⟩ => (.blob ∘ .mk ∘ .mk) <$> getMany x.toNat getUInt8 - | ⟨0xA, 0x0⟩ => .defn <$> get - | ⟨0xA, 0x1⟩ => .recr <$> get - | ⟨0xA, 0x2⟩ => .axio <$> get - | ⟨0xA, 0x3⟩ => .quot <$> get - | ⟨0xA, 0x4⟩ => .cprj <$> get - | ⟨0xA, 0x5⟩ => .rprj <$> get - | ⟨0xA, 0x6⟩ => .iprj <$> get - | ⟨0xA, 0x7⟩ => .dprj <$> get - | ⟨0xB, x⟩ => .muts <$> getMany x.toNat get - | ⟨0xE, 0x0⟩ => .prof <$> get - | ⟨0xE, 0x1⟩ => .eval <$> get - | ⟨0xE, 0x2⟩ => .chck <$> get - | ⟨0xE, 0x3⟩ => .comm <$> get - | ⟨0xE, 0x4⟩ => .envn <$> get - | ⟨0xE, 0x5⟩ => .prim <$> get - | ⟨0xF, x⟩ => do - let nodes <- getMany x.toNat Serialize.get - return .meta ⟨nodes⟩ - | x => throw s!"Unknown Ixon tag {repr x}" - -instance : Serialize Ixon where - put := putIxon - get := getIxon - -def Ixon.address (ixon: Ixon): Address := Address.blake3 (ser ixon) + | 0 => pure Ix.Name.mkAnon + | 1 => + let parentAddr ← Serialize.get + let parent ← match namesLookup.get? parentAddr with + | some p => pure p + | none => throw s!"getNameComponent: missing parent address {reprStr (toString parentAddr)}" + let len := (← getTag0).size.toNat + let sBytes ← getBytes len + match String.fromUTF8? sBytes with + | some s => pure (Ix.Name.mkStr parent s) + | none => throw "getNameComponent: invalid UTF-8" + | 2 => + let parentAddr ← Serialize.get + let parent ← match namesLookup.get? parentAddr with + | some p => pure p + | none => throw s!"getNameComponent: missing parent address {reprStr (toString parentAddr)}" + let len := (← getTag0).size.toNat + let nBytes ← getBytes len + pure (Ix.Name.mkNat parent (Nat.fromBytesLE nBytes.data)) + | t => throw s!"getNameComponent: invalid tag {t}" + +/-- Topologically sort names so parents come before children. -/ +partial def topologicalSortNames (names : Std.HashMap Address Ix.Name) : Array (Address × Ix.Name) := + -- DFS topological sort: visit parent before child + -- This matches the Rust implementation + let anonAddr := Ix.Name.mkAnon.getHash + let rec visit (name : Ix.Name) (visited : Std.HashSet Address) (result : Array (Address × Ix.Name)) + : Std.HashSet Address × Array (Address × Ix.Name) := + let addr := name.getHash + if visited.contains addr then (visited, result) + else + -- Visit parent first + let (visited, result) := match name with + | .anonymous _ => (visited, result) + | .str parent _ _ => visit parent visited result + | .num parent _ _ => visit parent visited result + let visited := visited.insert addr + let result := result.push (addr, name) + (visited, result) + -- Start with anonymous already visited (it's implicit) + let initVisited : Std.HashSet Address := ({} : Std.HashSet Address).insert anonAddr + -- Sort names by address before iterating to ensure deterministic DFS order + let sortedEntries := names.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + let (_, result) := sortedEntries.foldl (init := (initVisited, #[])) fun (visited, result) (_, name) => + visit name visited result + result + +/-- Serialize an Env to bytes. -/ +def putEnv (env : Env) : PutM Unit := do + -- Header: Tag4 with flag=0xE, size=0 (Env variant) + putTag4 ⟨FLAG, 0⟩ + + -- Section 1: Blobs (Address -> bytes) + let blobs := env.blobs.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + putTag0 ⟨blobs.size.toUInt64⟩ + for (addr, bytes) in blobs do + Serialize.put addr + putTag0 ⟨bytes.size.toUInt64⟩ + putBytes bytes + + -- Section 2: Consts (Address -> Constant) + let consts := env.consts.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + putTag0 ⟨consts.size.toUInt64⟩ + for (addr, constant) in consts do + Serialize.put addr + putConstant constant + + -- Section 3: Names (Address -> Name component) + -- Topologically sorted so parents come before children, with ties broken by address + let sortedNames := topologicalSortNames env.names + -- Build name index from sorted positions (matching Rust) + let nameIdx := sortedNames.zipIdx.foldl + (fun acc ((addr, _), i) => acc.insert addr i.toUInt64) {} + putTag0 ⟨sortedNames.size.toUInt64⟩ + for (addr, name) in sortedNames do + Serialize.put addr + putNameComponent name + + -- Section 4: Named (name Address -> Named with metadata) + let named := env.named.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + putTag0 ⟨named.size.toUInt64⟩ + for (name, namedEntry) in named do + -- Use the name's stored hash, which matches how it was stored in env.names + Serialize.put name.getHash + Serialize.put namedEntry.addr + putConstantMetaIndexed namedEntry.constMeta nameIdx + + -- Section 5: Comms (Address -> Comm) + let comms := env.comms.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + putTag0 ⟨comms.size.toUInt64⟩ + for (addr, comm) in comms do + Serialize.put addr + putComm comm + +/-- Deserialize an Env from bytes. -/ +def getEnv : GetM Env := do + -- Header + let tag ← getTag4 + if tag.flag != FLAG then + throw s!"Env.get: expected flag 0x{FLAG.toNat.toDigits 16}, got 0x{tag.flag.toNat.toDigits 16}" + if tag.size != 0 then + throw s!"Env.get: expected Env variant 0, got {tag.size}" + + let mut env : Env := {} + + -- Section 1: Blobs + let numBlobs := (← getTag0).size + for _ in [:numBlobs.toNat] do + let addr ← Serialize.get + let len := (← getTag0).size + let bytes ← getBytes len.toNat + env := { env with blobs := env.blobs.insert addr bytes } + + -- Section 2: Consts + let numConsts := (← getTag0).size + for _ in [:numConsts.toNat] do + let addr ← Serialize.get + let constant ← getConstant + env := { env with consts := env.consts.insert addr constant } + + -- Section 3: Names (build lookup table AND reverse index) + let numNames := (← getTag0).size + let mut namesLookup : Std.HashMap Address Ix.Name := {} + let mut nameRev : NameReverseIndex := #[] + -- Always include anonymous name + namesLookup := namesLookup.insert Ix.Name.mkAnon.getHash Ix.Name.mkAnon + for _ in [:numNames.toNat] do + let addr ← Serialize.get + let name ← getNameComponent namesLookup + nameRev := nameRev.push addr + namesLookup := namesLookup.insert addr name + env := { env with names := env.names.insert addr name } + + -- Section 4: Named (name Address -> Named with metadata) + let numNamed := (← getTag0).size + for _ in [:numNamed.toNat] do + let nameAddr ← Serialize.get + let constAddr : Address ← Serialize.get + let constMeta ← getConstantMetaIndexed nameRev + match namesLookup.get? nameAddr with + | some name => + let namedEntry : Named := ⟨constAddr, constMeta⟩ + env := { env with + named := env.named.insert name namedEntry + addrToName := env.addrToName.insert constAddr name } + | none => + throw s!"getEnv: named entry references unknown name address {reprStr (toString nameAddr)}" + + -- Section 5: Comms + let numComms := (← getTag0).size + for _ in [:numComms.toNat] do + let addr ← Serialize.get (α := Address) + let comm ← getComm + env := { env with comms := env.comms.insert addr comm } + + pure env + +end Env + +/-- Serialize an Env to bytes. -/ +def serEnv (env : Env) : ByteArray := runPut (Env.putEnv env) + +/-- Deserialize an Env from bytes. -/ +def desEnv (bytes : ByteArray) : Except String Env := runGet Env.getEnv bytes + +/-- Compute section sizes for debugging. Returns (blobs, consts, names, named, comms). -/ +def envSectionSizes (env : Env) : Nat × Nat × Nat × Nat × Nat := Id.run do + -- Blobs section + let blobsBytes := runPut do + let blobs := env.blobs.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + putTag0 ⟨blobs.size.toUInt64⟩ + for (addr, bytes) in blobs do + Serialize.put addr + putTag0 ⟨bytes.size.toUInt64⟩ + putBytes bytes + + -- Consts section + let constsBytes := runPut do + let consts := env.consts.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + putTag0 ⟨consts.size.toUInt64⟩ + for (addr, constant) in consts do + Serialize.put addr + putConstant constant + + -- Names section + let namesBytes := runPut do + let sortedNames := Env.topologicalSortNames env.names + putTag0 ⟨sortedNames.size.toUInt64⟩ + for (addr, name) in sortedNames do + Serialize.put addr + Env.putNameComponent name + + -- Named section + let namedBytes := runPut do + let sortedNames := Env.topologicalSortNames env.names + let nameIdx : NameIndex := sortedNames.zipIdx.foldl + (fun acc ((addr, _), i) => acc.insert addr i.toUInt64) {} + let named := env.named.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + putTag0 ⟨named.size.toUInt64⟩ + for (name, namedEntry) in named do + Serialize.put name.getHash + Serialize.put namedEntry.addr + putConstantMetaIndexed namedEntry.constMeta nameIdx + + -- Comms section + let commsBytes := runPut do + let comms := env.comms.toList.toArray.qsort fun a b => (compare a.1 b.1).isLT + putTag0 ⟨comms.size.toUInt64⟩ + for (addr, comm) in comms do + Serialize.put addr + putComm comm + + (blobsBytes.size, constsBytes.size, namesBytes.size, namedBytes.size, commsBytes.size) + +/-! ## Rust FFI Serialization -/ + +@[extern "rs_ser_env"] +opaque rsSerEnvFFI : @& RawEnv → ByteArray + +/-- Serialize an Ixon.Env to bytes using Rust. -/ +def rsSerEnv (env : Env) : ByteArray := + rsSerEnvFFI env.toRawEnv + +@[extern "rs_des_env"] +opaque rsDesEnvFFI : @& ByteArray → Except String RawEnv + +/-- Deserialize bytes to an Ixon.Env using Rust. -/ +def rsDesEnv (bytes : ByteArray) : Except String Env := + return (← rsDesEnvFFI bytes).toEnv end Ixon diff --git a/Ix/Meta.lean b/Ix/Meta.lean index 32543d67..ad1afc09 100644 --- a/Ix/Meta.lean +++ b/Ix/Meta.lean @@ -38,10 +38,6 @@ elab "this_file!" : term => do macro "get_env!" : term => `(getFileEnv this_file!) ---def computeIxAddress (env: Lean.Environment) (const: Lean.Name): Option MetaAddress --- := Id.run do --- let (addr, _) <- (Ix.CompileM.const const) --- return addr def runCore (f : CoreM α) (env : Environment) : IO α := Prod.fst <$> f.toIO { fileName := default, fileMap := default } { env } diff --git a/Ix/Mutual.lean b/Ix/Mutual.lean index ba6088a2..92019815 100644 --- a/Ix/Mutual.lean +++ b/Ix/Mutual.lean @@ -1,66 +1,82 @@ import Ix.Common import Ix.Address +import Ix.Environment import Lean namespace Ix structure Def where - name: Lean.Name - levelParams : List Lean.Name - type : Lean.Expr + name: Name + levelParams : Array Name + type : Expr kind : DefKind - value : Lean.Expr + value : Expr hints : Lean.ReducibilityHints - safety : Lean.DefinitionSafety - all : List Lean.Name - deriving BEq, Repr, Nonempty, Inhabited, Ord, Hashable + safety : DefinitionSafety + all : Array Name + deriving Repr, Nonempty, Inhabited, BEq structure Ind where - name: Lean.Name - levelParams : List Lean.Name - type : Lean.Expr + name: Name + levelParams : Array Name + type : Expr numParams : Nat numIndices : Nat - all : List Lean.Name - ctors : List Lean.ConstructorVal + all : Array Name + ctors : Array ConstructorVal numNested: Nat isRec : Bool isReflexive : Bool isUnsafe: Bool - deriving BEq, Repr, Nonempty, Inhabited + deriving Repr, Nonempty, Inhabited, BEq -abbrev Rec := Lean.RecursorVal +abbrev Rec := RecursorVal inductive MutConst where | defn : Def -> MutConst | indc : Ind -> MutConst | recr : Rec -> MutConst -deriving BEq, Repr, Nonempty, Inhabited +deriving Repr, Nonempty, Inhabited, BEq -def MutConst.mkDefn : Lean.DefinitionVal -> MutConst -| x => .defn ⟨x.name, x.levelParams, x.type, .definition, x.value, x.hints, x.safety, x.all⟩ +private def convertSafety : Lean.DefinitionSafety → DefinitionSafety + | .unsafe => .unsaf | .safe => .safe | .partial => .part -def MutConst.mkOpaq : Lean.OpaqueVal -> MutConst -| x => .defn ⟨x.name, x.levelParams, x.type, .opaque, x.value, .opaque, - if x.isUnsafe then .unsafe else .safe, x.all⟩ +def MutConst.fromDefinitionVal (x : DefinitionVal) : MutConst := + .defn ⟨x.cnst.name, x.cnst.levelParams, x.cnst.type, .defn, x.value, + x.hints, convertSafety x.safety, x.all⟩ -def MutConst.mkTheo : Lean.TheoremVal -> MutConst -| x => .defn ⟨x.name, x.levelParams, x.type, .theorem, x.value, .opaque, .safe, x.all⟩ +def MutConst.fromTheoremVal (x : TheoremVal) : MutConst := + .defn ⟨x.cnst.name, x.cnst.levelParams, x.cnst.type, .thm, x.value, + .opaque, .safe, x.all⟩ -def MutConst.name : MutConst -> Lean.Name +def MutConst.fromOpaqueVal (x : OpaqueVal) : MutConst := + .defn ⟨x.cnst.name, x.cnst.levelParams, x.cnst.type, .opaq, x.value, + .opaque, if x.isUnsafe then .unsaf else .safe, x.all⟩ + +/-- Create a MutConst.indc from an InductiveVal and its constructor values -/ +def MutConst.fromInductiveVal (i : InductiveVal) (ctorVals : Array ConstructorVal) : MutConst := + .indc ⟨i.cnst.name, i.cnst.levelParams, i.cnst.type, i.numParams, i.numIndices, + i.all, ctorVals, i.numNested, i.isRec, i.isReflexive, i.isUnsafe⟩ + +def MutConst.name : MutConst -> Name | .defn x => x.name | .indc x => x.name -| .recr x => x.name +| .recr x => x.cnst.name -def MutConst.ctors : MutConst -> List (Lean.ConstructorVal) +def MutConst.levelParams : MutConst -> Array Name +| .defn x => x.levelParams +| .indc x => x.levelParams +| .recr x => x.cnst.levelParams + +def MutConst.ctors : MutConst -> Array ConstructorVal | .indc x => x.ctors -| .defn _ => [] -| .recr _ => [] +| .defn _ => #[] +| .recr _ => #[] -def MutConst.contains (name: Lean.Name) : MutConst -> Bool +def MutConst.contains (name: Name) : MutConst -> Bool | .defn val => val.name == name -| .recr val => val.name == name -| .indc val => val.name == name || val.ctors.any (fun c => c.name == name) +| .recr val => val.cnst.name == name +| .indc val => val.name == name || val.ctors.any (fun c => c.cnst.name == name) -- We have a list of classes of mutual constants, each class representing a -- possible equivalence class. We would like to construct a numerical @@ -89,36 +105,18 @@ def MutConst.contains (name: Lean.Name) : MutConst -> Bool -- definitions and recursors, but we combine them for robustness and code -- deduplication. -- layout: [i0, i1, ..., iN, i0c0, ... i0cM, ... inc0, iNcM] -def MutConst.ctx (classes: List (List MutConst)) : MutCtx +def MutConst.ctx (classes: List (List MutConst)) : Ix.MutCtx := Id.run do - let mut mutCtx := default + let mut mutCtx : Ix.MutCtx := default let mut i := classes.length for (consts, j) in classes.zipIdx do let mut maxCtors := 0 for const in consts do mutCtx := mutCtx.insert const.name j - maxCtors := max maxCtors const.ctors.length - for (c, cidx) in List.zipIdx const.ctors do - mutCtx := mutCtx.insert c.name (i + cidx) + maxCtors := max maxCtors const.ctors.size + for (c, cidx) in Array.zipIdx const.ctors do + mutCtx := mutCtx.insert c.cnst.name (i + cidx) i := i + maxCtors return mutCtx - ---def a0 : Lean.ConstructorVal := ⟨⟨`a0, [], .bvar 0⟩, `a, 0, 0, 0, false⟩ ---def a1 : Lean.ConstructorVal := ⟨⟨`a1, [], .bvar 0⟩, `a, 1, 0, 0, false⟩ ---def a2 : Lean.ConstructorVal := ⟨⟨`a2, [], .bvar 0⟩, `a, 2, 0, 0, false⟩ ---def a : Ind := ⟨`a, [], .bvar 0, 0, 0, [], [a0, a1, a2], 0, false, false, false⟩ --- ---def b0 : Lean.ConstructorVal := ⟨⟨`b0, [], .bvar 0⟩, `b, 0, 0, 0, false⟩ ---def b1 : Lean.ConstructorVal := ⟨⟨`b1, [], .bvar 0⟩, `b, 1, 0, 0, false⟩ ---def b2 : Lean.ConstructorVal := ⟨⟨`b2, [], .bvar 0⟩, `b, 2, 0, 0, false⟩ ---def b : Ind := ⟨`b, [], .bvar 0, 0, 0, [], [b0, b1], 0, false, false, false⟩ --- ---def c0 : Lean.ConstructorVal := ⟨⟨`c0, [], .bvar 0⟩, `c, 0, 0, 0, false⟩ ---def c1 : Lean.ConstructorVal := ⟨⟨`c1, [], .bvar 0⟩, `c, 1, 0, 0, false⟩ ---def c2 : Lean.ConstructorVal := ⟨⟨`c2, [], .bvar 0⟩, `c, 2, 0, 0, false⟩ ---def c : Ind := ⟨`c, [], .bvar 0, 0, 0, [], [c0, c1, c2], 0, false, false, false⟩ --- ---#eval MutConst.ctx [[.indc a, .indc b], [.indc c]] - end Ix diff --git a/Ix/ShardMap.lean b/Ix/ShardMap.lean new file mode 100644 index 00000000..c918031e --- /dev/null +++ b/Ix/ShardMap.lean @@ -0,0 +1,446 @@ +/- + ShardMap: A concurrent hashmap with sharded locks for parallel access. + + This is a Lean equivalent of Rust's DashMap, providing O(1) concurrent + read/write access with reduced lock contention through sharding. + + ## Thread Safety + + - All single-key operations (`insert`, `get?`, `remove`, `modify`, etc.) are atomic + and thread-safe. + - Aggregate operations (`size`, `toArray`, `fold`, `toList`, `toHashMap`) iterate + over shards non-atomically - they may observe an inconsistent view during + concurrent modifications. + - `clear` is not atomic - shards are cleared sequentially. + + ## Performance Characteristics + + - Uses power-of-2 sharding with bit masking for O(1) shard selection. + - Each shard uses SharedMutex for reader-writer semantics (concurrent reads, + exclusive writes). + - Cache line padding prevents false sharing between adjacent shards. + - Hash mixing distributes keys more evenly across shards. + - Default 256 shards provides good parallelism for most workloads. + - `insertMany` parallelizes updates across shards for 10-20x speedup on bulk inserts. + + ## Usage + + let map ← ShardMap.new (shardBits := 6) -- 64 shards + map.insert key value + let val? ← map.get? key + + -- Bulk insert for better performance + map.insertMany #[(k1, v1), (k2, v2), ...] + + -- Atomic read-modify-write + let result? ← map.modifyGet key fun v => (computeResult v, updateValue v) +-/ + +import Std.Data.HashMap +import Std.Sync.SharedMutex + +namespace Ix + +/-- Wrapper to prevent false sharing between shards. + Adds padding to push each shard to separate cache lines (typically 64 bytes). + The SharedMutex pointer is ~8 bytes, padding adds 56 bytes for 64-byte alignment. -/ +structure PaddedShard (α : Type) (β : Type) [BEq α] [Hashable α] where + shard : Std.SharedMutex (Std.HashMap α β) + -- Padding fields to reach ~64 bytes total (cache line size) + -- Each UInt64 is 8 bytes + private _pad0 : UInt64 := 0 + private _pad1 : UInt64 := 0 + private _pad2 : UInt64 := 0 + private _pad3 : UInt64 := 0 + private _pad4 : UInt64 := 0 + private _pad5 : UInt64 := 0 + private _pad6 : UInt64 := 0 + +/-- A concurrent hashmap with sharded locks using reader-writer semantics. + + Each shard is protected by a SharedMutex, allowing concurrent reads + while writes are exclusive. Uses 2^shardBits shards with bit-masking for + fast shard selection. + + Shards are padded to separate cache lines to prevent false sharing. -/ +structure ShardMap (α : Type) (β : Type) [BEq α] [Hashable α] where + shards : Array (PaddedShard α β) + shardMask : USize -- 2^k - 1 for fast bitwise AND + h_pos : shards.size > 0 + +namespace ShardMap + +variable {α β : Type} [BEq α] [Hashable α] + +/-- Build an array of n padded shards with given capacity per shard. -/ +private def mkShardArrayWithCapacity (n : Nat) (capacity : Nat) + : BaseIO { arr : Array (PaddedShard α β) // arr.size = n } := do + let rec go (remaining : Nat) (acc : Array (PaddedShard α β)) (hacc : acc.size + remaining = n) : + BaseIO { arr : Array (PaddedShard α β) // arr.size = n } := do + match remaining with + | 0 => pure ⟨acc, by omega⟩ + | r + 1 => + let mutex ← Std.SharedMutex.new (Std.HashMap.emptyWithCapacity capacity) + let paddedShard : PaddedShard α β := { shard := mutex } + go r (acc.push paddedShard) (by simp [Array.size_push]; omega) + go n #[] (by simp) + +/-- Build an array of n empty padded shards. -/ +private def mkShardArray (n : Nat) : BaseIO { arr : Array (PaddedShard α β) // arr.size = n } := + mkShardArrayWithCapacity n 0 + +/-- Create a new ShardMap with 2^shardBits shards. + Default is 8 bits = 256 shards, which provides good parallelism for most workloads. -/ +def new (shardBits : Nat := 8) : BaseIO (ShardMap α β) := do + let numShards := 2 ^ shardBits + let shardMask : USize := (numShards - 1).toUSize + let ⟨shards, hsize⟩ ← mkShardArray numShards + have h : shards.size > 0 := by simp [hsize]; exact Nat.one_le_two_pow + pure ⟨shards, shardMask, h⟩ + +/-- Compute the shard index for a given key using fast bit masking. + Mixes high and low bits of the hash for better distribution across shards, + which helps when hash functions produce correlated low bits. -/ +@[inline] +def shardIdx (m : ShardMap α β) (key : α) : Nat := + let h : UInt64 := hash key + -- Mix high and low bits for better distribution + let mixed := h ^^^ (h >>> 32) + (mixed.toUSize &&& m.shardMask).toNat + +/-- Get the shard for a key. -/ +@[inline] +def getShard (m : ShardMap α β) (key : α) : Std.SharedMutex (Std.HashMap α β) := + let idx := m.shardIdx key + (m.shards[idx % m.shards.size]'(Nat.mod_lt _ m.h_pos)).shard + +/-- Contention threshold in nanoseconds (100ms) -/ +def contentionThresholdNs : Nat := 100_000_000 + +/-- Insert a key-value pair into the map. + If the key already exists, its value is replaced. -/ +@[inline] +def insert (m : ShardMap α β) (key : α) (val : β) : BaseIO Unit := do + m.getShard key |>.atomically fun ref => do + let map ← ST.Ref.get ref + ST.Ref.set ref (map.insert key val) + +/-- Insert with contention detection. -/ +def insertTimed (m : ShardMap α β) (key : α) (val : β) + (label : String := "ShardMap") : IO Unit := do + let start ← IO.monoNanosNow + let shardIdx := m.shardIdx key + m.getShard key |>.atomically fun ref => do + let map ← ST.Ref.get ref + ST.Ref.set ref (map.insert key val) + let elapsed := (← IO.monoNanosNow) - start + if elapsed > contentionThresholdNs then + IO.eprintln s!"[CONTENTION] {label} shard {shardIdx}: insert took {elapsed / 1_000_000}ms" + +/-- Look up a key in the map, returning `none` if not found. + Uses shared read lock for concurrent access. -/ +@[inline] +def get? (m : ShardMap α β) (key : α) : BaseIO (Option β) := do + m.getShard key |>.atomicallyRead fun map => pure (map.get? key) + +/-- Look up a key in the map, returning a default value if not found. -/ +@[inline] +def getD (m : ShardMap α β) (key : α) (default : β) : BaseIO β := do + match ← m.get? key with + | some v => pure v + | none => pure default + +/-- Get a value or insert a new one if the key doesn't exist. + The `mkVal` function is only called if the key is not present. + Returns the value (either existing or newly inserted). + Uses double-checked locking: tries read lock first, only takes write lock on miss. -/ +def getOrInsert (m : ShardMap α β) (key : α) (mkVal : Unit → BaseIO β) : BaseIO β := do + let shard := m.getShard key + -- Fast path: try read lock first + let cached? ← shard.atomicallyRead fun map => pure (map.get? key) + match cached? with + | some v => pure v + | none => + -- Slow path: take write lock and double-check + shard.atomically fun ref => do + let map ← ST.Ref.get ref + match map.get? key with + | some v => pure v -- Another thread inserted while we waited + | none => do + let v ← mkVal () + ST.Ref.set ref (map.insert key v) + pure v + +/-- Get a value or insert a new one if the key doesn't exist (IO version). + The `mkVal` function is only called if the key is not present. + Returns the value (either existing or newly inserted). + Uses double-checked locking: tries read lock first, only takes write lock on miss. -/ +def getOrInsertIO (m : ShardMap α β) (key : α) (mkVal : Unit → IO β) : IO β := do + let shard := m.getShard key + -- Fast path: try read lock first + let cached? ← shard.atomicallyRead fun map => pure (map.get? key) + match cached? with + | some v => pure v + | none => + -- Slow path: take write lock and double-check + shard.atomically fun ref => do + let map ← ST.Ref.get ref + match map.get? key with + | some v => pure v -- Another thread inserted while we waited + | none => do + let v ← mkVal () + ST.Ref.set ref (map.insert key v) + pure v + +/-- Get a value or insert a new one (pure version). + The default value is evaluated only if needed. + Uses double-checked locking: tries read lock first, only takes write lock on miss. -/ +def getOrInsertLazy (m : ShardMap α β) (key : α) (mkVal : Unit → β) : BaseIO β := do + let shard := m.getShard key + -- Fast path: try read lock first + let cached? ← shard.atomicallyRead fun map => pure (map.get? key) + match cached? with + | some v => pure v + | none => + -- Slow path: take write lock and double-check + shard.atomically fun ref => do + let map ← ST.Ref.get ref + match map.get? key with + | some v => pure v + | none => + let v := mkVal () + ST.Ref.set ref (map.insert key v) + pure v + +/-- Get a value or insert a new one, with contention detection. + Prints a warning if the operation takes longer than the threshold. + Uses double-checked locking: tries read lock first, only takes write lock on miss. -/ +def getOrInsertLazyTimed (m : ShardMap α β) (key : α) (mkVal : Unit → β) + (label : String := "ShardMap") : IO β := do + let start ← IO.monoNanosNow + let shardIdx := m.shardIdx key + let shard := m.getShard key + -- Fast path: try read lock first + let cached? ← shard.atomicallyRead fun map => pure (map.get? key) + let result ← match cached? with + | some v => pure v + | none => + -- Slow path: take write lock and double-check + shard.atomically fun ref => do + let map ← ST.Ref.get ref + match map.get? key with + | some v => pure v + | none => + let v := mkVal () + ST.Ref.set ref (map.insert key v) + pure v + let elapsed := (← IO.monoNanosNow) - start + if elapsed > contentionThresholdNs then + IO.eprintln s!"[CONTENTION] {label} shard {shardIdx}: getOrInsertLazy took {elapsed / 1_000_000}ms" + pure result + +/-- Check if a key exists in the map. + Uses shared read lock for concurrent access. -/ +@[inline] +def contains (m : ShardMap α β) (key : α) : BaseIO Bool := do + m.getShard key |>.atomicallyRead fun map => pure (map.contains key) + +/-- Remove a key from the map, returning the removed value if it existed. -/ +def remove (m : ShardMap α β) (key : α) : BaseIO (Option β) := do + m.getShard key |>.atomically fun ref => do + let map ← ST.Ref.get ref + match map.get? key with + | some v => + ST.Ref.set ref (map.erase key) + pure (some v) + | none => pure none + +/-- Modify the value associated with a key, if it exists. + Returns `true` if the key was found and modified. -/ +def modify (m : ShardMap α β) (key : α) (f : β → β) : BaseIO Bool := do + m.getShard key |>.atomically fun ref => do + let map ← ST.Ref.get ref + match map.get? key with + | some v => + ST.Ref.set ref (map.insert key (f v)) + pure true + | none => pure false + +/-- Modify a value and return a result, atomically. + The function `f` receives the current value and returns both a result and + a new value. Returns `none` if the key doesn't exist. + + This is useful for patterns that need to read and modify in a single + lock acquisition, e.g., incrementing a counter and returning the old value. -/ +def modifyGet (m : ShardMap α β) (key : α) (f : β → (γ × β)) : BaseIO (Option γ) := do + m.getShard key |>.atomically fun ref => do + let map ← ST.Ref.get ref + match map.get? key with + | some v => + let (result, newV) := f v + ST.Ref.set ref (map.insert key newV) + pure (some result) + | none => pure none + +/-! ## Bulk Operations -/ + +/-- Insert multiple key-value pairs, grouping by shard for efficiency. + This reduces lock acquisition overhead compared to inserting items one by one. + + Items are grouped by their target shard, then each shard is updated in parallel + using IO.asTask for better multi-core utilization. -/ +def insertMany (m : ShardMap α β) (items : Array (α × β)) : IO Unit := do + -- Group items by shard index + let numShards := m.shards.size + let mut shardGroups : Array (Array (α × β)) := Array.replicate numShards #[] + for (k, v) in items do + let idx := m.shardIdx k + shardGroups := shardGroups.modify idx (·.push (k, v)) + -- Insert each group in parallel across shards + let finalGroups := shardGroups + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for i in [:numShards] do + if let some group := finalGroups[i]? then + if group.size > 0 then + let paddedShard := m.shards[i % numShards]'(Nat.mod_lt _ m.h_pos) + let task ← IO.asTask do + paddedShard.shard.atomically fun ref => do + let map ← ST.Ref.get ref + ST.Ref.set ref (group.foldl (fun m (k, v) => m.insert k v) map) + tasks := tasks.push task + -- Wait for all tasks to complete + for task in tasks do + let _ ← IO.ofExcept task.get + +/-- Get the approximate size of the map. + Note: This is not atomic and may not be exact during concurrent modifications. -/ +def size (m : ShardMap α β) : BaseIO Nat := do + let mut total := 0 + for paddedShard in m.shards do + let sz ← paddedShard.shard.atomicallyRead fun map => pure map.size + total := total + sz + pure total + +/-- Convert the map to an array of key-value pairs. O(n) time complexity. + Single-pass collection to avoid stale size between passes. + Not atomic; may be inconsistent during concurrent modifications. -/ +def toArray (m : ShardMap α β) : BaseIO (Array (α × β)) := do + let mut result : Array (α × β) := #[] + for paddedShard in m.shards do + let shardMap ← paddedShard.shard.atomicallyRead fun map => pure map + for pair in shardMap do + result := result.push pair + pure result + +/-- Fold over all key-value pairs in the map. + Not atomic; may be inconsistent during concurrent modifications. -/ +def fold (m : ShardMap α β) (init : γ) (f : γ -> α -> β -> γ) : BaseIO γ := do + let mut acc := init + for paddedShard in m.shards do + let shardMap ← paddedShard.shard.atomicallyRead fun map => pure map + for (k, v) in shardMap do + acc := f acc k v + pure acc + +/-- Convert the map to a list of key-value pairs. O(n) time complexity. + Note: This is not atomic and may not be consistent during concurrent modifications. -/ +def toList (m : ShardMap α β) : BaseIO (List (α × β)) := do + let arr ← m.toArray + pure arr.toList + +/-- Convert to a regular HashMap. O(n) time complexity using bulk construction. + Note: This is not atomic and may not be consistent during concurrent modifications. -/ +def toHashMap (m : ShardMap α β) : BaseIO (Std.HashMap α β) := do + let pairs ← m.toArray + pure (Std.HashMap.emptyWithCapacity pairs.size |>.insertMany pairs) + +/-- Clear all entries from the map. + Note: This is not atomic - shards are cleared sequentially. -/ +def clear (m : ShardMap α β) : BaseIO Unit := do + for paddedShard in m.shards do + paddedShard.shard.atomically fun ref => ST.Ref.set ref {} + +/-! ## Non-blocking operations -/ + +/-- Get a value with try-lock fast path. + Attempts non-blocking read first, falls back to blocking if shard is contended. + Useful for read-heavy workloads where you want to avoid blocking on contended shards. -/ +@[inline] +def get?Fast (m : ShardMap α β) (key : α) : BaseIO (Option β) := do + -- Try non-blocking first + match ← m.getShard key |>.tryAtomicallyRead fun map => pure (map.get? key) with + | some result => pure result + | none => m.get? key -- Fall back to blocking + +/-- Check if a key exists with try-lock fast path. + Attempts non-blocking read first, falls back to blocking if shard is contended. + Useful for read-heavy workloads where you want to avoid blocking on contended shards. -/ +@[inline] +def containsFast (m : ShardMap α β) (key : α) : BaseIO Bool := do + -- Try non-blocking first + match ← m.getShard key |>.tryAtomicallyRead fun map => pure (map.contains key) with + | some result => pure result + | none => m.contains key -- Fall back to blocking + +/-- Try to get a value without blocking. + Returns `some (some v)` if key exists, `some none` if key doesn't exist, + or `none` if the shard is currently locked. -/ +def tryGet? (m : ShardMap α β) (key : α) : BaseIO (Option (Option β)) := do + m.getShard key |>.tryAtomicallyRead fun map => pure (map.get? key) + +/-- Try to insert without blocking. + Returns `true` if insert succeeded, `false` if shard was locked. -/ +def tryInsert (m : ShardMap α β) (key : α) (val : β) : BaseIO Bool := do + let shard := m.getShard key + match ← shard.tryAtomically fun ref => do + let map ← ST.Ref.get ref + ST.Ref.set ref (map.insert key val) + with + | some () => pure true + | none => pure false + +/-- Try to get or insert without blocking. + Returns `some v` with the value (existing or new), or `none` if shard was locked. -/ +def tryGetOrInsertLazy (m : ShardMap α β) (key : α) (mkVal : Unit → β) : BaseIO (Option β) := do + let shard := m.getShard key + -- First try read lock + match ← shard.tryAtomicallyRead fun map => pure (map.get? key) with + | some (some v) => pure (some v) -- Found it + | some none => + -- Not found, try write lock + match ← shard.tryAtomically fun ref => do + let map ← ST.Ref.get ref + match map.get? key with + | some v => pure v + | none => + let v := mkVal () + ST.Ref.set ref (map.insert key v) + pure v + with + | some v => pure (some v) + | none => pure none -- Write lock failed + | none => pure none -- Read lock failed + +/-! ## Capacity hints -/ + +/-- Create a new ShardMap with pre-sized shards. + + **Note:** Benchmarks show that pre-allocation generally hurts performance due to + HashMap allocation overhead. In most cases, `new` with natural growth performs + better than `newWithCapacity`. This function is retained for cases where you've + profiled and determined pre-allocation helps your specific workload. + + Capacity is capped at 64 entries per shard to limit allocation overhead. -/ +def newWithCapacity (shardBits : Nat := 8) (capacityPerShard : Nat := 64) + : BaseIO (ShardMap α β) := do + let numShards := 2 ^ shardBits + let shardMask : USize := (numShards - 1).toUSize + -- Cap capacity to avoid allocation overhead; let HashMap grow naturally beyond this + let effectiveCapacity := min capacityPerShard 64 + let ⟨shards, hsize⟩ ← mkShardArrayWithCapacity numShards effectiveCapacity + have h : shards.size > 0 := by simp [hsize]; exact Nat.one_le_two_pow + pure ⟨shards, shardMask, h⟩ + +end ShardMap + +end Ix diff --git a/Ix/Sharing.lean b/Ix/Sharing.lean new file mode 100644 index 00000000..ddffb72f --- /dev/null +++ b/Ix/Sharing.lean @@ -0,0 +1,509 @@ +/- + Sharing Analysis for expression deduplication within mutual blocks. + + This module provides alpha-invariant sharing analysis using Merkle-tree hashing. + Expressions that are structurally identical get the same hash, and we decide + which subterms to share based on a profitability heuristic. + + Algorithm: + 1. Post-order traversal with Merkle hashing (blake3) + 2. Count usage of each unique subterm + 3. Profitability: share if (count - 1) * size > count * ref_size + 4. Build sharing vector in topological order (leaves first) + 5. Rewrite expressions with Share(idx) references +-/ + +import Ix.Ixon +import Ix.Address +import Ix.Common +import Std.Data.HashMap +import Blake3 + +namespace Ix.Sharing + +/-- Convert UInt64 to ByteArray (little-endian, fixed 8 bytes). + This MUST match Rust's `u64.to_le_bytes()` for hash compatibility. -/ +def uint64ToBytes (x : UInt64) : ByteArray := + let arr : Array UInt8 := #[ + (x &&& 0xFF).toUInt8, + ((x >>> 8) &&& 0xFF).toUInt8, + ((x >>> 16) &&& 0xFF).toUInt8, + ((x >>> 24) &&& 0xFF).toUInt8, + ((x >>> 32) &&& 0xFF).toUInt8, + ((x >>> 40) &&& 0xFF).toUInt8, + ((x >>> 48) &&& 0xFF).toUInt8, + ((x >>> 56) &&& 0xFF).toUInt8 + ] + ByteArray.mk arr + +/-- Compute encoded size of Tag0 (variable-length u64). + If value < 128: 1 byte, else 1 + byteCount bytes. + Must match Rust's Tag0::encoded_size(). -/ +def tag0EncodedSize (value : UInt64) : Nat := + if value < 128 then 1 else 1 + value.byteCount.toNat + +/-- Compute encoded size of Tag4 (4-bit flag + variable-length size). + If size < 8: 1 byte, else 1 + byteCount bytes. + Must match Rust's Tag4::encoded_size(). -/ +def tag4EncodedSize (size : UInt64) : Nat := + if size < 8 then 1 else 1 + size.byteCount.toNat + +/-- Compute the sharing hash for an expression node given its child hashes. + This is the single source of truth for sharing hash computation. + MUST match Rust's `hash_node` exactly for hash compatibility. -/ +def computeNodeHash (e : Ixon.Expr) (childHashes : Array Address) : Address := + let buf := ByteArray.emptyWithCapacity 100 + let buf := match e with + | .sort univIdx => + buf.push Ixon.Expr.FLAG_SORT |>.append (uint64ToBytes univIdx) + | .var idx => + buf.push Ixon.Expr.FLAG_VAR |>.append (uint64ToBytes idx) + | .ref refIdx univIndices => + let base := buf.push Ixon.Expr.FLAG_REF + |>.append (uint64ToBytes refIdx) + |>.append (uint64ToBytes univIndices.size.toUInt64) + univIndices.foldl (fun buf idx => buf.append (uint64ToBytes idx)) base + | .recur recIdx univIndices => + let base := buf.push Ixon.Expr.FLAG_REC + |>.append (uint64ToBytes recIdx) + |>.append (uint64ToBytes univIndices.size.toUInt64) + univIndices.foldl (fun buf idx => buf.append (uint64ToBytes idx)) base + | .prj typeRefIdx fieldIdx _ => + buf.push Ixon.Expr.FLAG_PRJ + |>.append (uint64ToBytes typeRefIdx) + |>.append (uint64ToBytes fieldIdx) + |>.append childHashes[0]!.hash + | .str refIdx => + buf.push Ixon.Expr.FLAG_STR |>.append (uint64ToBytes refIdx) + | .nat refIdx => + buf.push Ixon.Expr.FLAG_NAT |>.append (uint64ToBytes refIdx) + | .app _ _ => + buf.push Ixon.Expr.FLAG_APP + |>.append childHashes[0]!.hash + |>.append childHashes[1]!.hash + | .lam _ _ => + buf.push Ixon.Expr.FLAG_LAM + |>.append childHashes[0]!.hash + |>.append childHashes[1]!.hash + | .all _ _ => + buf.push Ixon.Expr.FLAG_ALL + |>.append childHashes[0]!.hash + |>.append childHashes[1]!.hash + | .letE nonDep _ _ _ => + buf.push Ixon.Expr.FLAG_LET + |>.push (if nonDep then 1 else 0) + |>.append childHashes[0]!.hash + |>.append childHashes[1]!.hash + |>.append childHashes[2]!.hash + | .share idx => + buf.push Ixon.Expr.FLAG_SHARE |>.append (uint64ToBytes idx) + Address.blake3 buf + +/-- Compute the sharing hash of an expression recursively (for testing). + Uses computeNodeHash as the single source of truth. -/ +partial def computeExprHash (e : Ixon.Expr) : Address := + let childHashes := match e with + | .sort _ | .var _ | .ref _ _ | .recur _ _ | .str _ | .nat _ | .share _ => #[] + | .prj _ _ val => #[computeExprHash val] + | .app fun_ arg => #[computeExprHash fun_, computeExprHash arg] + | .lam ty body | .all ty body => #[computeExprHash ty, computeExprHash body] + | .letE _ ty val body => #[computeExprHash ty, computeExprHash val, computeExprHash body] + computeNodeHash e childHashes + +/-- Information about a subterm for sharing analysis. -/ +structure SubtermInfo where + /-- Base size of this node alone (Tag4 header, not including children) for Ixon format -/ + baseSize : Nat + /-- Number of occurrences within this block -/ + usageCount : Nat + /-- Canonical representative expression -/ + expr : Ixon.Expr + /-- Hashes of child subterms (for topological ordering) -/ + children : Array Address + deriving Inhabited + +/-- Compute the base size of a node (Tag4 header size) for Ixon serialization. -/ +def computeBaseSize (e : Ixon.Expr) : Nat := + match e with + | .sort univIdx => + if univIdx < 8 then 1 else 1 + univIdx.byteCount.toNat + | .var idx => + if idx < 8 then 1 else 1 + idx.byteCount.toNat + | .ref refIdx univIndices => + -- Tag4 for size + Tag0 for refIdx + Tag0 for each universe index + tag4EncodedSize univIndices.size.toUInt64 + + tag0EncodedSize refIdx + + univIndices.foldl (fun acc idx => acc + tag0EncodedSize idx) 0 + | .recur recIdx univIndices => + -- Tag4 for size + Tag0 for recIdx + Tag0 for each universe index + tag4EncodedSize univIndices.size.toUInt64 + + tag0EncodedSize recIdx + + univIndices.foldl (fun acc idx => acc + tag0EncodedSize idx) 0 + | .prj typeRefIdx fieldIdx _ => + let tagSize := if fieldIdx < 8 then 1 else 1 + fieldIdx.byteCount.toNat + let refIdxSize := if typeRefIdx < 128 then 1 else 1 + typeRefIdx.byteCount.toNat + tagSize + refIdxSize + | .str refIdx => + if refIdx < 8 then 1 else 1 + refIdx.byteCount.toNat + | .nat refIdx => + if refIdx < 8 then 1 else 1 + refIdx.byteCount.toNat + | .app _ _ => 1 -- telescope count >= 1 + | .lam _ _ => 1 + | .all _ _ => 1 + | .letE _ _ _ _ => 1 -- size encodes non_dep flag + | .share idx => + if idx < 8 then 1 else 1 + idx.byteCount.toNat + +/-- Get the memory address of an expression for identity-based caching. + This is safe because we only use it within a single analysis pass + where expressions are not modified. -/ +@[inline] +def exprPtr (e : Ixon.Expr) : USize := unsafe ptrAddrUnsafe e + +/-- State for the analysis monad. -/ +structure AnalyzeState where + /-- Map from content hash to subterm info -/ + infoMap : Std.HashMap Address SubtermInfo := {} + /-- Map from expression pointer to its content hash (for O(1) lookup during rewrite) -/ + ptrToHash : Std.HashMap USize Address := {} + /-- Topological order built during traversal (leaves first) -/ + topoOrder : Array Address := #[] + deriving Inhabited + +/-- Analysis monad. -/ +abbrev AnalyzeM := StateM AnalyzeState + +/-- Hash an expression and its children recursively, returning the content hash. + Phase 1 of the efficient algorithm: builds DAG structure without computing usage counts. + Uses computeNodeHash as the single source of truth for hash computation. -/ +partial def hashAndAnalyze (e : Ixon.Expr) : AnalyzeM Address := do + -- Check if we've already processed this exact pointer + let st ← get + let ptr := exprPtr e + if let some hash := st.ptrToHash.get? ptr then + -- Already processed - just return the hash (no subtree walk needed) + return hash + + -- Recursively process children first and collect their hashes + let childHashes ← match e with + | .sort _ | .var _ | .ref _ _ | .recur _ _ | .str _ | .nat _ | .share _ => + pure #[] + | .prj _ _ val => + let valHash ← hashAndAnalyze val + pure #[valHash] + | .app fun_ arg => + let funHash ← hashAndAnalyze fun_ + let argHash ← hashAndAnalyze arg + pure #[funHash, argHash] + | .lam ty body | .all ty body => + let tyHash ← hashAndAnalyze ty + let bodyHash ← hashAndAnalyze body + pure #[tyHash, bodyHash] + | .letE _ ty val body => + let tyHash ← hashAndAnalyze ty + let valHash ← hashAndAnalyze val + let bodyHash ← hashAndAnalyze body + pure #[tyHash, valHash, bodyHash] + + -- Compute the content hash using the single source of truth + let hash := computeNodeHash e childHashes + + -- Update state: add to pointer cache + let st ← get + set { st with ptrToHash := st.ptrToHash.insert ptr hash } + + -- Add to info map if not already present (same content hash from different pointer) + let st ← get + if !st.infoMap.contains hash then + let baseSize := computeBaseSize e + set { st with + infoMap := st.infoMap.insert hash { + baseSize + usageCount := 0 -- Will be computed in phase 2 + expr := e + children := childHashes + } + topoOrder := st.topoOrder.push hash + } + + return hash + +/-- Result of sharing analysis. -/ +structure AnalyzeResult where + /-- Map from content hash to subterm info -/ + infoMap : Std.HashMap Address SubtermInfo + /-- Map from expression pointer to content hash -/ + ptrToHash : Std.HashMap USize Address + /-- Topological order (leaves first) - built during traversal -/ + topoOrder : Array Address + +/-- Analyze expressions for sharing opportunities within a block. + Uses a two-phase O(n) algorithm: + 1. Build DAG structure via post-order traversal with Merkle-tree hashing + 2. Propagate usage counts structurally from roots to leaves + Returns AnalyzeResult with infoMap, ptrToHash, and topoOrder. -/ +def analyzeBlock (exprs : Array Ixon.Expr) : AnalyzeResult := Id.run do + -- Phase 1: Build DAG structure + let mut st : AnalyzeState := {} + for e in exprs do + (_, st) := hashAndAnalyze e |>.run st + + -- Phase 2: Propagate usage counts structurally from roots to leaves + -- This is O(n) total - no subtree walks needed + + -- Count root contributions + let mut infoMap := st.infoMap + for e in exprs do + let ptr := exprPtr e + if let some hash := st.ptrToHash.get? ptr then + if let some info := infoMap.get? hash then + infoMap := infoMap.insert hash { info with usageCount := info.usageCount + 1 } + + -- Propagate counts from roots to leaves (reverse topological order) + for hash in st.topoOrder.reverse do + if let some info := infoMap.get? hash then + let count := info.usageCount + for childHash in info.children do + if let some childInfo := infoMap.get? childHash then + infoMap := infoMap.insert childHash { childInfo with usageCount := childInfo.usageCount + count } + + { infoMap, ptrToHash := st.ptrToHash, topoOrder := st.topoOrder } + +/-- Visit state for topological sort. -/ +inductive VisitState where + | inProgress + | done + +/-- Topological sort of subterms (leaves first, parents last). + CRITICAL: Keys are sorted by hash bytes for deterministic output. + This ensures Lean and Rust produce the same topological order. -/ +partial def topologicalSort (infoMap : Std.HashMap Address SubtermInfo) : Array Address := Id.run do + let mut state : Std.HashMap Address VisitState := {} + let mut result : Array Address := #[] + + -- Sort keys deterministically by hash bytes (lexicographic comparison) + let sortedKeys := infoMap.toArray.map (·.1) |>.qsort fun a b => + a.hash.data < b.hash.data + + for hash in sortedKeys do + (state, result) := visit hash infoMap state result + + result +where + visit (hash : Address) (infoMap : Std.HashMap Address SubtermInfo) + (state : Std.HashMap Address VisitState) (result : Array Address) + : Std.HashMap Address VisitState × Array Address := Id.run do + let mut state := state + let mut result := result + + match state.get? hash with + | some .done => return (state, result) + | some .inProgress => return (state, result) -- Cycle (shouldn't happen) + | none => pure () + + state := state.insert hash .inProgress + + if let some info := infoMap.get? hash then + for child in info.children do + (state, result) := visit child infoMap state result + + state := state.insert hash .done + result := result.push hash + (state, result) + +/-- Compute effective sizes for all subterms in topological order. + Returns a map from hash to effective size (total serialized bytes). -/ +def computeEffectiveSizes (infoMap : Std.HashMap Address SubtermInfo) + (topoOrder : Array Address) : Std.HashMap Address Nat := Id.run do + let mut sizes : Std.HashMap Address Nat := {} + + for hash in topoOrder do + if let some info := infoMap.get? hash then + let mut size := info.baseSize + for childHash in info.children do + size := size + sizes.getD childHash 0 + sizes := sizes.insert hash size + + sizes + +/-- Compute the encoded size of a Share(idx) tag. -/ +def shareRefSize (idx : Nat) : Nat := + let idx64 := idx.toUInt64 + if idx64 < 8 then 1 else 1 + idx64.byteCount.toNat + +/-- Candidate for sharing: (hash, term_size, usage_count, potential_savings). -/ +structure SharingCandidate where + hash : Address + termSize : Nat + usageCount : Nat + potential : _root_.Int -- (N-1) * size - N (assuming ref_size=1) + deriving Inhabited + +/-- Decide which subterms to share based on profitability. + + Sharing is profitable when: (N - 1) * term_size > N * share_ref_size + where N is usage count, term_size is effective size, and share_ref_size + is the size of a Share(idx) reference at the current index. + + Returns a set of hashes to share. -/ +def decideSharing (infoMap : Std.HashMap Address SubtermInfo) + (topoOrder : Array Address) : Array Address := Id.run do + let effectiveSizes := computeEffectiveSizes infoMap topoOrder + + -- Pre-filter and collect candidates + let mut candidates : Array SharingCandidate := #[] + for (hash, info) in infoMap do + if info.usageCount < 2 then continue + let termSize := effectiveSizes.getD hash 0 + let n := info.usageCount + -- Potential savings assuming ref_size = 1 (minimum) + let potential : _root_.Int := (n - 1 : _root_.Int) * termSize - n + if potential > 0 then + candidates := candidates.push { hash, termSize, usageCount := n, potential } + + -- Sort by decreasing gross benefit ((n-1) * size), with hash bytes as tie-breaker + -- NOTE: Rust sorts by gross benefit, not net potential. Hash tie-breaker ensures determinism. + candidates := candidates.qsort fun a b => + let grossA := (a.usageCount - 1) * a.termSize + let grossB := (b.usageCount - 1) * b.termSize + if grossA != grossB then grossA > grossB + else a.hash.hash.data < b.hash.hash.data -- lexicographic tie-breaker + + let mut shared : Array Address := #[] + + -- Process ALL candidates - don't break early! + for cand in candidates do + let nextIdx := shared.size + let nextRefSize := shareRefSize nextIdx + let n := cand.usageCount + let savings : _root_.Int := (n - 1 : _root_.Int) * cand.termSize - n * nextRefSize + if savings > 0 then + shared := shared.push cand.hash + + shared + +/-- Rewrite an expression tree to use Share(idx) references. + Uses pointer-based caching like Rust: cache rewritten expressions by pointer, + and only check hashToIdx if pointer is in ptrToHash (no hash recomputation). -/ +partial def rewriteWithSharing (e : Ixon.Expr) (hashToIdx : Std.HashMap Address Nat) + (ptrToHash : Std.HashMap USize Address) + (cache : Std.HashMap USize Ixon.Expr) : Ixon.Expr × Std.HashMap USize Ixon.Expr := + let ptr := exprPtr e + -- Check cache first + match cache.get? ptr with + | some cached => (cached, cache) + | none => + -- Check if this expression should become a Share reference + -- Only if pointer is in ptrToHash (was seen during analysis) + match ptrToHash.get? ptr with + | some hash => + match hashToIdx.get? hash with + | some idx => + let result := Ixon.Expr.share idx.toUInt64 + (result, cache.insert ptr result) + | none => rewriteChildren e hashToIdx ptrToHash cache ptr + | none => rewriteChildren e hashToIdx ptrToHash cache ptr +where + rewriteChildren (e : Ixon.Expr) (hashToIdx : Std.HashMap Address Nat) + (ptrToHash : Std.HashMap USize Address) + (cache : Std.HashMap USize Ixon.Expr) (ptr : USize) + : Ixon.Expr × Std.HashMap USize Ixon.Expr := + -- Rewrite children, but reuse original if nothing changed (like Rust's Arc::ptr_eq optimization) + let (result, cache') := match e with + | .sort _ | .var _ | .ref _ _ | .recur _ _ | .str _ | .nat _ | .share _ => + (e, cache) + | .prj typeRefIdx fieldIdx val => + let (val', cache') := rewriteWithSharing val hashToIdx ptrToHash cache + -- Reuse original if child unchanged + let result := if exprPtr val == exprPtr val' then e else .prj typeRefIdx fieldIdx val' + (result, cache') + | .app fun_ arg => + let (fun', cache') := rewriteWithSharing fun_ hashToIdx ptrToHash cache + let (arg', cache'') := rewriteWithSharing arg hashToIdx ptrToHash cache' + -- Reuse original if both children unchanged + let result := if exprPtr fun_ == exprPtr fun' && exprPtr arg == exprPtr arg' + then e else .app fun' arg' + (result, cache'') + | .lam ty body => + let (ty', cache') := rewriteWithSharing ty hashToIdx ptrToHash cache + let (body', cache'') := rewriteWithSharing body hashToIdx ptrToHash cache' + let result := if exprPtr ty == exprPtr ty' && exprPtr body == exprPtr body' + then e else .lam ty' body' + (result, cache'') + | .all ty body => + let (ty', cache') := rewriteWithSharing ty hashToIdx ptrToHash cache + let (body', cache'') := rewriteWithSharing body hashToIdx ptrToHash cache' + let result := if exprPtr ty == exprPtr ty' && exprPtr body == exprPtr body' + then e else .all ty' body' + (result, cache'') + | .letE nonDep ty val body => + let (ty', cache') := rewriteWithSharing ty hashToIdx ptrToHash cache + let (val', cache'') := rewriteWithSharing val hashToIdx ptrToHash cache' + let (body', cache''') := rewriteWithSharing body hashToIdx ptrToHash cache'' + let result := if exprPtr ty == exprPtr ty' && exprPtr val == exprPtr val' && exprPtr body == exprPtr body' + then e else .letE nonDep ty' val' body' + (result, cache''') + (result, cache'.insert ptr result) + +/-- Rewrite expressions to use Share(idx) references for shared subterms. + + Returns the rewritten expressions and the sharing vector. + The sharing vector is sorted in topological order (leaves first). -/ +def buildSharingVec (exprs : Array Ixon.Expr) (sharedHashes : Array Address) + (infoMap : Std.HashMap Address SubtermInfo) + (ptrToHash : Std.HashMap USize Address) : Array Ixon.Expr × Array Ixon.Expr := Id.run do + + -- CRITICAL: Re-sort shared_hashes in topological order (leaves first). + -- Use topologicalSort instead of filtering topoOrder from traversal. + -- This ensures deterministic ordering by sorting keys by hash bytes. + let topoOrder := topologicalSort infoMap + let sharedSet : Std.HashMap Address Unit := sharedHashes.foldl (init := {}) fun s h => s.insert h () + let sharedInTopoOrder : Array Address := topoOrder.filter fun h => sharedSet.contains h + + -- Build sharing vector incrementally to avoid forward references + let mut sharingVec : Array Ixon.Expr := #[] + let mut hashToIdx : Std.HashMap Address Nat := {} + + for h in sharedInTopoOrder do + if let some info := infoMap.get? h then + -- Clear cache each iteration - hashToIdx changed, so cached rewrites are invalid + let rewriteCache : Std.HashMap USize Ixon.Expr := {} + let (rewritten, _) := rewriteWithSharing info.expr hashToIdx ptrToHash rewriteCache + + let idx := sharingVec.size + sharingVec := sharingVec.push rewritten + hashToIdx := hashToIdx.insert h idx + + -- Rewrite the root expressions (can use all Share indices) + -- Fresh cache for root expressions since hashToIdx is now complete + let mut rewriteCache : Std.HashMap USize Ixon.Expr := {} + let mut rewrittenExprs : Array Ixon.Expr := #[] + for e in exprs do + let (rewritten, cache') := rewriteWithSharing e hashToIdx ptrToHash rewriteCache + rewriteCache := cache' + rewrittenExprs := rewrittenExprs.push rewritten + + (rewrittenExprs, sharingVec) + +/-- Apply sharing analysis to a set of expressions. + Returns (rewritten_exprs, sharing_vector). -/ +def applySharing (exprs : Array Ixon.Expr) (dbg : Bool := false) + : Array Ixon.Expr × Array Ixon.Expr := Id.run do + let result := analyzeBlock exprs + let sharedHashes := decideSharing result.infoMap result.topoOrder + if dbg then + dbg_trace s!"[Sharing] analyzed {exprs.size} exprs, found {result.infoMap.size} unique subterms, {sharedHashes.size} to share" + dbg_trace s!"[Sharing] ptrToHash has {result.ptrToHash.size} entries" + -- Debug: show usage counts for all subterms with usage >= 2 + let effectiveSizes := computeEffectiveSizes result.infoMap result.topoOrder + for (hash, info) in result.infoMap do + if info.usageCount >= 2 then + let size := effectiveSizes.getD hash 0 + let potential : _root_.Int := (info.usageCount - 1 : _root_.Int) * size - info.usageCount + dbg_trace s!" usage={info.usageCount} size={size} potential={potential} expr={repr info.expr}" + if sharedHashes.isEmpty then + return (exprs, #[]) + else + return buildSharingVec exprs sharedHashes result.infoMap result.ptrToHash + +end Ix.Sharing diff --git a/Ix/Store.lean b/Ix/Store.lean index a476a5e3..d8c6d5ad 100644 --- a/Ix/Store.lean +++ b/Ix/Store.lean @@ -1,12 +1,10 @@ import Ix.Address -import Ix.Ixon import Init.System.FilePath import Init.System.IO import Init.System.IOError open System -open Ixon inductive StoreError | unknownAddress (a: Address) diff --git a/Ix/Toy.lean b/Ix/Toy.lean deleted file mode 100644 index 5a343438..00000000 --- a/Ix/Toy.lean +++ /dev/null @@ -1,325 +0,0 @@ ---import Lean ---import Std.Data.HashMap ---import Blake3 --- ---namespace List --- ---partial def mergeM [Monad μ] (cmp : α → α → μ Ordering) : List α → List α → μ (List α) --- | as@(a::as'), bs@(b::bs') => do --- if (← cmp a b) == Ordering.gt --- then List.cons b <$> mergeM cmp as bs' --- else List.cons a <$> mergeM cmp as' bs --- | [], bs => return bs --- | as, [] => return as --- ---def mergePairsM [Monad μ] (cmp: α → α → μ Ordering) : List (List α) → μ (List (List α)) --- | a::b::xs => List.cons <$> (mergeM cmp a b) <*> mergePairsM cmp xs --- | xs => return xs --- ---partial def mergeAllM [Monad μ] (cmp: α → α → μ Ordering) : List (List α) → μ (List α) --- | [x] => return x --- | xs => mergePairsM cmp xs >>= mergeAllM cmp --- ---mutual --- partial def sequencesM [Monad μ] (cmp : α → α → μ Ordering) : List α → μ (List (List α)) --- | a::b::xs => do --- if (← cmp a b) == .gt --- then descendingM cmp b [a] xs --- else ascendingM cmp b (fun ys => a :: ys) xs --- | xs => return [xs] --- --- partial def descendingM [Monad μ] (cmp : α → α → μ Ordering) (a : α) (as : List α) : List α → μ (List (List α)) --- | b::bs => do --- if (← cmp a b) == .gt --- then descendingM cmp b (a::as) bs --- else List.cons (a::as) <$> sequencesM cmp (b::bs) --- | [] => List.cons (a::as) <$> sequencesM cmp [] --- --- partial def ascendingM [Monad μ] (cmp : α → α → μ Ordering) (a : α) (as : List α → List α) : List α → μ (List (List α)) --- | b::bs => do --- if (← cmp a b) != .gt --- then ascendingM cmp b (fun ys => as (a :: ys)) bs --- else List.cons (as [a]) <$> sequencesM cmp (b::bs) --- | [] => List.cons (as [a]) <$> sequencesM cmp [] ---end --- ---def sortByM [Monad μ] (xs: List α) (cmp: α -> α -> μ Ordering) : μ (List α) := --- sequencesM cmp xs >>= mergeAllM cmp --- ---end List --- --- ---namespace Ix.Toy --- ---deriving instance BEq for ByteArray --- ---structure Address where --- hash : ByteArray --- deriving Inhabited, BEq, Hashable --- ---instance : Ord Address where --- compare a b := compare a.hash.data.toList b.hash.data.toList --- ---def Address.blake3 (x: ByteArray) : Address := ⟨(Blake3.hash x).val⟩ --- ---inductive AST where ---| var : Nat -> AST ---| lam : AST -> AST ---| app : AST -> AST -> AST ---| ref : String -> AST ---deriving Inhabited, BEq, Hashable --- ---structure Def where --- name: String --- val: AST --- all: List String ---deriving BEq, Inhabited --- ---structure Env where --- decls: Std.HashMap String Def --- ---inductive HAST where ---| var : Nat -> HAST ---| ref : Address -> HAST ---| rcr : Nat -> HAST ---| lam : HAST -> HAST ---| app : HAST -> HAST -> HAST ---deriving BEq, Hashable --- ---def natToBytesLE (x: Nat) : Array UInt8 := --- if x == 0 then Array.mkArray1 0 else List.toArray (go x x) --- where --- go : Nat -> Nat -> List UInt8 --- | _, 0 => [] --- | 0, _ => [] --- | Nat.succ f, x => Nat.toUInt8 x:: go f (x / 256) --- ---def HAST.serialize : HAST -> ByteArray ---| .var x => ⟨#[0x0]⟩ ++ ⟨natToBytesLE x⟩ ---| .ref x => ⟨#[0x1]⟩ ++ x.hash ---| .rcr x => ⟨#[0x2]⟩ ++ ⟨natToBytesLE x⟩ ---| .lam x => ⟨#[0x3]⟩ ++ x.serialize ---| .app x y => ⟨#[0x4]⟩ ++ x.serialize ++ y.serialize --- -----def HAST.hash (x: HAST) : Address := Address.blake3 x.serialize --- ---inductive HConst where ---| defn : HAST -> HConst ---| defs : List HAST -> HConst ---| dprj : Address -> Nat -> HConst ---deriving BEq, Hashable --- ---def HConst.serialize : HConst -> ByteArray ---| .defn x => ⟨#[0x5]⟩ ++ x.serialize ---| .defs xs => ⟨#[0x6]⟩ ++ (xs.foldr (fun a acc => a.serialize ++ acc) ⟨#[]⟩) ---| .dprj a n => ⟨#[0x7]⟩ ++ a.hash ++ ⟨natToBytesLE n⟩ --- ---structure HEnv where --- names: Std.HashMap String (Address × Nat) --- consts: Std.HashMap Address HConst --- ---structure CompileState where --- env: Std.HashMap String Def --- names: Std.HashMap String Address --- cache: Std.HashMap HConst Address --- consts: Std.HashMap Address HConst --- ---def CompileState.init (x: Env) : CompileState := --- ⟨x.decls, default, default, default⟩ --- ---abbrev CompileM := ExceptT String <| StateT CompileState Id --- ---def CompileM.run (stt: CompileState) (c : CompileM α) --- : Except String α × CompileState --- := StateT.run (ExceptT.run c) stt --- ---def hashHConst (const: HConst) : CompileM Address := do --- match (<- get).cache.get? const with --- | some a => pure a --- | none => do --- let addr := Address.blake3 const.serialize --- modifyGet fun stt => (addr, { stt with --- cache := stt.cache.insert const addr --- consts := stt.consts.insert addr const --- }) --- ---abbrev MutCtx := Std.HashMap String Nat --- ---structure SOrder where --- strong: Bool --- ord: Ordering ---deriving Inhabited --- ---def SOrder.cmp : SOrder -> SOrder -> SOrder ---| ⟨true, .eq⟩, y => y ---| ⟨false, .eq⟩, y => ⟨false, y.ord⟩ ---| x, _ => x --- ---def SOrder.cmpM [Monad μ] (x y: μ SOrder) : μ SOrder := do --- match <- x with --- | ⟨true, .eq⟩ => y --- | ⟨false, .eq⟩ => y >>= fun ⟨_, b⟩ => pure ⟨false, b⟩ --- | x => pure x --- ---def lookupDef (s: String) : CompileM Def := do match (<- get).env.get? s with ---| some d => pure d ---| none => throw "unknown def" --- ---mutual --- ---partial def compileDef (defn: Def): CompileM Address := do --- match (<- get).names.get? defn.name with --- | some a => pure a --- | none => match defn.all with --- | [] => do --- let addr <- (.defn <$> compileAST {} defn.val) >>= hashHConst --- modifyGet fun stt => (addr, { stt with --- names := stt.names.insert defn.name addr --- }) --- | ds => do --- let defs <- ds.mapM lookupDef --- let (mutDefs, mutCtx) <- partitionDefs defs.toArray --- let hasts <- mutDefs.mapM (fun ds => compileAST mutCtx ds[0]!) --- let block <- hashHConst (.defs hasts) --- for d in ds do --- let idx := mutCtx.get! d --- let addr <- hashHConst (.dprj block idx) --- modify fun stt => { stt with names := stt.names.insert d addr } --- hashHConst (.dprj block (mutCtx.get! defn.name)) --- ---partial def compileAST (mutCtx: MutCtx) : AST -> CompileM HAST ---| .var x => pure <| .var x ---| .lam x => .lam <$> compileAST mutCtx x ---| .app x y => .app <$> compileAST mutCtx x <*> compileAST mutCtx y ---| .ref s => match mutCtx.get? s with --- | .some n => pure <| .rcr n --- | .none => do match (<- get).env.get? s with --- | some x => do --- let addr <- compileDef x --- pure $ .ref addr --- | .none => throw "unknown reference" --- ---partial def compareAST (mutCtx: MutCtx) : AST -> AST -> CompileM SOrder ---| .var x, .var y => pure ⟨true, compare x y⟩ ---| .var _, _ => pure ⟨true, .lt⟩ ---| _, .var _=> pure ⟨true, .gt⟩ ---| .lam x, .lam y => compareAST mutCtx x y ---| .lam _, _ => pure ⟨true, .lt⟩ ---| _, .lam _=> pure ⟨true, .gt⟩ ---| .app xf xa, .app yf ya => --- SOrder.cmpM (compareAST mutCtx xf yf) (compareAST mutCtx xa ya) ---| .app _ _, _ => pure ⟨true, .lt⟩ ---| _ , .app _ _ => pure ⟨true, .gt⟩ ---| .ref x, .ref y => match mutCtx.get? x, mutCtx.get? y with --- | some nx, some ny => pure ⟨false, compare nx ny⟩ --- | none, some _ => pure ⟨true, .gt⟩ --- | some _, none => pure ⟨true, .lt⟩ --- | none, none => do --- let x' <- lookupDef x >>= compileDef --- let y' <- lookupDef y >>= compileDef --- pure ⟨true, compare x' y'⟩ --- --- ---partial def partitionDefs (defs: Array Def) : CompileM ((List (List Def)) × MutCtx) := do --- -- initial state --- let mut partOf: Array Nat := Array.replicate defs.size 0 --- let mut parts : Array (Array Nat) := #[Array.range defs.size] --- let mut cache : Std.HashMap (Nat × Nat) SOrder := {} --- --- -- partiton refinement loop --- while true do --- let mutCtx := buildMutCtx partOf --- let mut newParts : Array (Array Nat) := #[] --- let mut nextPartId := 0 --- let mut changed := false --- let mut newPartOf := partOf --- --- for part in parts do --- -- singleton partition --- if part.size <= 1 then --- newParts := newParts.push part --- for i in part do --- newPartOf := newPartOf.set! i nextPartId --- nextPartId := nextPartId + 1 --- else --- -- build comparison signatures --- let mut sigs : Array (Array SOrder) := Array.replicate part.size #[] --- --- for idx in [0:part.size] do --- let i := part[idx]! --- let mut sig : Array SOrder := #[] --- --- for jdx in part[0:part.size] do --- let j := part[jdx]! --- if i != j then --- let key := (min i j, max i j) --- let cmp <- match cache.get? key with --- | some cmp => pure cmp --- | none => do --- let cmp <- compareAST mutCtx defs[i]!.val defs[j]!.val --- if cmp.strong then cache := cache.insert key cmp --- pure cmp --- sig := sig.push cmp --- --- sigs := sigs.set! idx sig --- --- let mut splits : Array (Array Nat) := #[] --- let mut splitSigs: Array (Array SOrder) := #[] --- --- for idx in [0:part.size] do --- let idx := part[idx]! --- let sig := sigs[idx]! --- let mut found := false --- --- for g in [0:splits.size] do --- if sigsEqual sig splitSigs[g]! then --- splits := splits.set! g (splits[g]!.push idx) --- found := true --- break --- if !found then --- splits := splits.push #[idx] --- splitSigs := splitSigs.push sig --- --- if splits.size > 1 then --- changed := true --- for split in splits do --- for idx in split do --- newPartOf := newPartOf.set! idx nextPartId --- newParts := newParts.push split --- nextPartId := nextPartId + 1 --- --- parts := newParts --- partOf := newPartOf --- if !changed then break --- --- let ctx := buildMutCtx partOf --- let sortedParts <- sortParts parts ctx --- --- sorry --- where --- sigsEqual (x y: Array SOrder) : Bool := --- x.size == y.size && ((List.range x.size).all fun i => --- x[i]!.ord == y[i]!.ord) --- --- buildMutCtx (partOf: Array Nat) : MutCtx := Id.run do --- let mut ctx : MutCtx := {} --- for i in [0:defs.size] do --- ctx := ctx.insert defs[i]!.name partOf[i]! --- ctx --- sortParts (parts: Array (Array Nat)) (ctx: MutCtx) : CompileM (List (List Def)) := do --- let mut reps : Array (Nat × Nat) := #[] --- for i in [0:parts.size] do --- reps := reps.push (i, parts[i]![0]!) --- --- let sortedParts : List (Nat × Nat) <- do --- reps.toList.sortByM <| fun (_, i) (_, j) => (·.ord) <$> --- compareAST ctx defs[i]!.val defs[j]!.val --- let sorted := sortedParts.map <| fun (i, _) => --- parts[pidx]!.qsort (fun i j => defs[i]!.name < defs[j]!.name) --- pure sorted --- ---end --- ---end Ix.Toy --- --- diff --git a/README.md b/README.md index 89b67526..c03d8b0f 100644 --- a/README.md +++ b/README.md @@ -176,6 +176,23 @@ Ix consists of the following core components: - Build and test the Ix library with `lake build` and `lake test` +### Testing + +**Lean tests:** `lake test` runs all primary test suites. + +- `lake test -- ` runs a specific suite. Primary suites: `ffi`, `byte-array`, `ixon`, `claim`, `commit`, `canon`, `keccak`, `sharing`, `graph-unit`, `condense-unit` +- `lake test -- --ignored` runs expensive test suites: `shard-map`, `rust-canon-roundtrip`, `serial-canon-roundtrip`, `parallel-canon-roundtrip`, `graph-cross`, `condense-cross`, `compile`, `decompile`, `rust-serialize`, `rust-decompile`, `commit-io`, `sharing-io` +- `lake test -- ` runs a specific expensive suite by name +- `lake test -- cli` runs CLI integration tests +- `lake test -- rust-compile` runs the Rust cross-compilation diagnostic + +To run the Aiur and IxVM tests, which are slower, run: + +- `lake exe test-aiur` +- `lake exe test-ixvm` + +**Rust tests:** `cargo test` + - Run the Ix CLI with `lake exe ix`. Install the binary with `lake run install` - `ix store ` will compile a lean program into the ix store as ixon data diff --git a/Tests/AiurTest.lean b/Tests/AiurTest.lean new file mode 100644 index 00000000..aa4db641 --- /dev/null +++ b/Tests/AiurTest.lean @@ -0,0 +1,8 @@ +import Tests.Aiur + +def testSuite : Std.HashMap String (List LSpec.TestSeq) := .ofList [ + ("aiur", Tests.Aiur.suite), +] + +def main (args : List String) : IO UInt32 := do + LSpec.lspecIO testSuite args diff --git a/Tests/Cli.lean b/Tests/Cli.lean index 021eb21c..89e2afce 100644 --- a/Tests/Cli.lean +++ b/Tests/Cli.lean @@ -1,4 +1,4 @@ -/-! Integration tests for the Ix CLI -/ +/- Integration tests for the Ix CLI -/ def Tests.Cli.run (buildCmd: String) (buildArgs : Array String) (buildDir : Option System.FilePath) : IO Unit := do let proc : IO.Process.SpawnArgs := diff --git a/Tests/Common.lean b/Tests/Common.lean index 9e2ef71b..85e2dd07 100644 --- a/Tests/Common.lean +++ b/Tests/Common.lean @@ -1,4 +1,10 @@ +/- + Common test utilities. + Basic type generators have been moved to Tests/Gen/Basic.lean. +-/ + import LSpec +import Tests.Gen.Basic import Ix.Unsigned import Ix.Aiur.Goldilocks import Ix.Aiur.Protocol @@ -7,33 +13,6 @@ import Ix.Aiur.Compile open LSpec SlimCheck Gen -def genUInt8 : Gen UInt8 := - UInt8.ofNat <$> choose Nat 0 0xFF - -def genUInt32 : Gen UInt32 := - UInt32.ofNat <$> choose Nat 0 0xFFFFFFFF - -def genUInt64 : Gen UInt64 := - UInt64.ofNat <$> choose Nat 0 0xFFFFFFFFFFFFFFFF - -def genUSize : Gen USize := - .ofNat <$> choose Nat 0 (2^System.Platform.numBits - 1) - -def frequency' (default: Gen α) (xs: List (Nat × Gen α)) : Gen α := do - let n ← choose Nat 0 total - pick n xs - where - total := List.sum (Prod.fst <$> xs) - pick n xs := match xs with - | [] => default - | (k, x) :: xs => if n <= k then x else pick (n - k) xs - -def frequency [Inhabited α] (xs: List (Nat × Gen α)) : Gen α := - frequency' xs.head!.snd xs - -def oneOf' [Inhabited α] (xs: List (Gen α)) : Gen α := - frequency (xs.map (fun x => (100, x))) - structure AiurTestCase where functionName : Lean.Name input : Array Aiur.G diff --git a/Tests/FFI.lean b/Tests/FFI.lean new file mode 100644 index 00000000..9950e931 --- /dev/null +++ b/Tests/FFI.lean @@ -0,0 +1,15 @@ +/- + FFI test suite aggregator. + Imports all Tests.FFI.* modules and exports a combined suite. +-/ + +import Tests.FFI.Basic +import Tests.FFI.Ix +import Tests.FFI.Ixon + +namespace Tests.FFI + +def suite : List LSpec.TestSeq := + Tests.FFI.Basic.suite ++ Tests.FFI.Ix.suite ++ Tests.FFI.Ixon.suite + +end Tests.FFI diff --git a/Tests/FFI/Basic.lean b/Tests/FFI/Basic.lean new file mode 100644 index 00000000..6a020eb5 --- /dev/null +++ b/Tests/FFI/Basic.lean @@ -0,0 +1,135 @@ +/- + Basic type FFI roundtrip tests. + Pattern: Lean value → Rust (decode) → Rust (re-encode via C API) → Lean value → compare +-/ + +import LSpec +import Tests.Gen.Basic +import Std.Data.HashMap + +open LSpec SlimCheck Gen + +namespace Tests.FFI.Basic + +/-! ## FFI declarations for round-trip tests -/ + +/-- Round-trip a Nat through Rust: decode then re-encode -/ +@[extern "rs_roundtrip_nat"] +opaque roundtripNat : @& Nat → Nat + +/-- Round-trip a String through Rust -/ +@[extern "rs_roundtrip_string"] +opaque roundtripString : @& String → String + +/-- Round-trip a List Nat through Rust -/ +@[extern "rs_roundtrip_list_nat"] +opaque roundtripListNat : @& List Nat → List Nat + +/-- Round-trip an Array Nat through Rust -/ +@[extern "rs_roundtrip_array_nat"] +opaque roundtripArrayNat : @& Array Nat → Array Nat + +/-- Round-trip a ByteArray through Rust -/ +@[extern "rs_roundtrip_bytearray"] +opaque roundtripByteArray : @& ByteArray → ByteArray + +@[extern "rs_roundtrip_bool"] +opaque roundtripBool : @& Bool → Bool + +@[extern "rs_roundtrip_point"] +opaque roundtripPoint : @& Point → Point + +@[extern "rs_roundtrip_nat_tree"] +opaque roundtripNatTree : @& NatTree → NatTree + +/-! ## AssocList and HashMap roundtrips -/ + +-- Re-export the internal AssocList type for testing +abbrev AssocList := Std.DHashMap.Internal.AssocList + +@[extern "rs_roundtrip_assoclist_nat_nat"] +opaque roundtripAssocListNatNat : @& AssocList Nat (fun _ => Nat) → AssocList Nat (fun _ => Nat) + +-- DHashMap.Raw for testing the inner structure +abbrev DHashMapRaw := Std.DHashMap.Raw + +@[extern "rs_roundtrip_dhashmap_raw_nat_nat"] +opaque roundtripDHashMapRawNatNat : @& DHashMapRaw Nat (fun _ => Nat) → DHashMapRaw Nat (fun _ => Nat) + +@[extern "rs_roundtrip_hashmap_nat_nat"] +opaque roundtripHashMapNatNat : @& Std.HashMap Nat Nat → Std.HashMap Nat Nat + +/-! ## Simple unit tests -/ + +def simpleTests : TestSeq := + test "Nat 0" (roundtripNat 0 == 0) ++ + test "Nat 42" (roundtripNat 42 == 42) ++ + test "Nat 1000" (roundtripNat 1000 == 1000) ++ + test "String empty" (roundtripString "" == "") ++ + test "String hello" (roundtripString "hello" == "hello") ++ + test "List []" (roundtripListNat [] == []) ++ + test "List [1,2,3]" (roundtripListNat [1, 2, 3] == [1, 2, 3]) ++ + test "Array #[]" (roundtripArrayNat #[] == #[]) ++ + test "Array #[1,2,3]" (roundtripArrayNat #[1, 2, 3] == #[1, 2, 3]) ++ + test "ByteArray empty" (roundtripByteArray ⟨#[]⟩ == ⟨#[]⟩) ++ + test "ByteArray [1,2,3]" (roundtripByteArray ⟨#[1, 2, 3]⟩ == ⟨#[1, 2, 3]⟩) ++ + test "Point (0, 0)" (roundtripPoint ⟨0, 0⟩ == ⟨0, 0⟩) ++ + test "Point (42, 99)" (roundtripPoint ⟨42, 99⟩ == ⟨42, 99⟩) ++ + test "NatTree leaf" (roundtripNatTree (.leaf 42) == .leaf 42) ++ + test "NatTree node" (roundtripNatTree (.node (.leaf 1) (.leaf 2)) == .node (.leaf 1) (.leaf 2)) + +/-! ## Specific edge case tests -/ + +def largeNatTests : TestSeq := + let testCases : List Nat := [0, 1, 255, 256, 65535, 65536, (2^32 - 1), 2^32, + (2^63 - 1), 2^63, (2^64 - 1), 2^64, 2^64 + 1, 2^128, 2^256] + testCases.foldl (init := .done) fun acc n => + acc ++ .individualIO s!"Nat {n}" (do + let rt := roundtripNat n + pure (rt == n, if rt == n then none else some s!"got {rt}")) .done + +/-! ## Helper to compare HashMaps -/ + +def hashMapEq (m1 m2 : Std.HashMap Nat Nat) : Bool := + m1.size == m2.size && m1.toList.all fun (k, v) => m2.get? k == some v + +def assocListEq (l1 l2 : AssocList Nat (fun _ => Nat)) : Bool := + let toSimpleList (l : AssocList Nat (fun _ => Nat)) : List (Nat × Nat) := + l.toList.map fun ⟨k, v⟩ => (k, v) + toSimpleList l1 == toSimpleList l2 + +def assocListTests : TestSeq := + let emptyList : AssocList Nat (fun _ => Nat) := .nil + let single : AssocList Nat (fun _ => Nat) := .cons 1 42 .nil + let double : AssocList Nat (fun _ => Nat) := .cons 2 99 (.cons 1 42 .nil) + test "AssocList nil" (assocListEq (roundtripAssocListNatNat emptyList) emptyList) ++ + test "AssocList single" (assocListEq (roundtripAssocListNatNat single) single) ++ + test "AssocList double" (assocListEq (roundtripAssocListNatNat double) double) + +def hashMapTests : TestSeq := + test "HashMap empty" (hashMapEq (roundtripHashMapNatNat {}) {}) ++ + test "HashMap single" (hashMapEq (roundtripHashMapNatNat (({} : Std.HashMap Nat Nat).insert 1 42)) (({} : Std.HashMap Nat Nat).insert 1 42)) + +def boolTests : TestSeq := + test "Bool true" (roundtripBool true == true) ++ + test "Bool false" (roundtripBool false == false) + +/-! ## Test Suite -/ + +def suite : List TestSeq := [ + simpleTests, + largeNatTests, + assocListTests, + hashMapTests, + boolTests, + checkIO "Nat roundtrip" (∀ n : Nat, roundtripNat n == n), + checkIO "String roundtrip" (∀ s : String, roundtripString s == s), + checkIO "List Nat roundtrip" (∀ xs : List Nat, roundtripListNat xs == xs), + checkIO "Array Nat roundtrip" (∀ arr : Array Nat, roundtripArrayNat arr == arr), + checkIO "ByteArray roundtrip" (∀ ba : ByteArray, roundtripByteArray ba == ba), + checkIO "Point roundtrip" (∀ p : Point, roundtripPoint p == p), + checkIO "NatTree roundtrip" (∀ t : NatTree, roundtripNatTree t == t), + checkIO "HashMap Nat Nat roundtrip" (∀ m : Std.HashMap Nat Nat, hashMapEq (roundtripHashMapNatNat m) m), +] + +end Tests.FFI.Basic diff --git a/Tests/FFI/Ix.lean b/Tests/FFI/Ix.lean new file mode 100644 index 00000000..a103fc61 --- /dev/null +++ b/Tests/FFI/Ix.lean @@ -0,0 +1,290 @@ +/- + Ix.* type FFI roundtrip tests. + Pattern: Lean value → Rust (decode) → Rust (re-encode via C API) → Lean value → compare +-/ + +import LSpec +import Tests.Gen.Basic +import Tests.Gen.Ix +import Tests.Gen.Ixon +import Ix.Environment +import Ix.Address +import Ix.CompileM +import Ix.DecompileM +import Ix.Ixon +import Tests.FFI.Ixon + +open LSpec SlimCheck Gen +open Tests.Gen.Ix +open Tests.Gen.Ixon +open Tests.FFI.Ixon (rawEnvEq) + +namespace Tests.FFI.Ix + +/-! ## Ix type roundtrip FFI declarations -/ + +@[extern "rs_roundtrip_ix_address"] +opaque roundtripIxAddress : @& Address → Address + +@[extern "rs_roundtrip_ix_name"] +opaque roundtripIxName : @& Ix.Name → Ix.Name + +@[extern "rs_roundtrip_ix_level"] +opaque roundtripIxLevel : @& Ix.Level → Ix.Level + +@[extern "rs_roundtrip_ix_expr"] +opaque roundtripIxExpr : @& Ix.Expr → Ix.Expr + +@[extern "rs_roundtrip_ix_int"] +opaque roundtripIxInt : @& Ix.Int → Ix.Int + +@[extern "rs_roundtrip_ix_substring"] +opaque roundtripIxSubstring : @& Ix.Substring → Ix.Substring + +@[extern "rs_roundtrip_ix_source_info"] +opaque roundtripIxSourceInfo : @& Ix.SourceInfo → Ix.SourceInfo + +@[extern "rs_roundtrip_ix_syntax_preresolved"] +opaque roundtripIxSyntaxPreresolved : @& Ix.SyntaxPreresolved → Ix.SyntaxPreresolved + +@[extern "rs_roundtrip_ix_syntax"] +opaque roundtripIxSyntax : @& Ix.Syntax → Ix.Syntax + +@[extern "rs_roundtrip_ix_data_value"] +opaque roundtripIxDataValue : @& Ix.DataValue → Ix.DataValue + +-- Need Inhabited instance for opaque declaration +instance : Inhabited Ix.ConstantInfo where + default := .axiomInfo { cnst := { name := default, levelParams := #[], type := default }, isUnsafe := false } + +@[extern "rs_roundtrip_ix_constant_info"] +opaque roundtripIxConstantInfo : @& Ix.ConstantInfo → Ix.ConstantInfo + +-- Need Inhabited instance for Environment opaque declaration +instance : Inhabited Ix.Environment where + default := { consts := {} } + +-- Rust roundtrip returns RawEnvironment (array-based), not Environment (HashMap-based) +@[extern "rs_roundtrip_ix_raw_environment"] +opaque roundtripIxRawEnvironment : @& Ix.RawEnvironment → Ix.RawEnvironment + +-- Roundtrip Environment by going through RawEnvironment +@[extern "rs_roundtrip_ix_environment"] +opaque roundtripIxEnvironmentRaw : @& Ix.Environment → Ix.RawEnvironment + +def roundtripIxEnvironment (env : Ix.Environment) : Ix.Environment := + (roundtripIxEnvironmentRaw env).toEnvironment + +-- Round-trip Ix.RustCondensedBlocks +instance : Inhabited Ix.RustCondensedBlocks where + default := { lowLinks := #[], blocks := #[], blockRefs := #[] } + +instance : Repr Ix.RustCondensedBlocks where + reprPrec cb _ := s!"RustCondensedBlocks(lowLinks={cb.lowLinks.size}, blocks={cb.blocks.size}, blockRefs={cb.blockRefs.size})" + +@[extern "rs_roundtrip_rust_condensed_blocks"] +opaque roundtripRustCondensedBlocks : @& Ix.RustCondensedBlocks → Ix.RustCondensedBlocks + +-- Round-trip Ix.CompileM.RustCompilePhases +instance : Inhabited Ix.CompileM.RustCompilePhases where + default := { rawEnv := default, condensed := default, compileEnv := default } + +instance : Repr Ix.CompileM.RustCompilePhases where + reprPrec p _ := s!"RustCompilePhases(rawEnv.consts={p.rawEnv.consts.size}, condensed.blocks={p.condensed.blocks.size}, compileEnv.consts={p.compileEnv.consts.size})" + +@[extern "rs_roundtrip_rust_compile_phases"] +opaque roundtripRustCompilePhases : @& Ix.CompileM.RustCompilePhases → Ix.CompileM.RustCompilePhases + +/-! ## BlockCompareResult and BlockCompareDetail FFI tests -/ + +/-- Result of comparing a single block between Lean and Rust -/ +inductive BlockCompareResult where + | «match» + | mismatch (leanSize rustSize firstDiffOffset : UInt64) + | notFound + deriving Repr, BEq, DecidableEq, Inhabited + +/-- Detailed comparison with sharing statistics -/ +structure BlockCompareDetail where + result : BlockCompareResult + leanSharingLen : UInt64 + rustSharingLen : UInt64 + deriving Repr, BEq, DecidableEq, Inhabited + +@[extern "rs_roundtrip_block_compare_result"] +opaque roundtripBlockCompareResult : @& BlockCompareResult → BlockCompareResult + +@[extern "rs_roundtrip_block_compare_detail"] +opaque roundtripBlockCompareDetail : @& BlockCompareDetail → BlockCompareDetail + +def blockCompareResultTests : TestSeq := + let matchCase := BlockCompareResult.match + let mismatchCase := BlockCompareResult.mismatch 100 200 50 + let notFoundCase := BlockCompareResult.notFound + test "BlockCompareResult.match" (roundtripBlockCompareResult matchCase == matchCase) ++ + test "BlockCompareResult.mismatch" (roundtripBlockCompareResult mismatchCase == mismatchCase) ++ + test "BlockCompareResult.notFound" (roundtripBlockCompareResult notFoundCase == notFoundCase) + +def blockCompareDetailTests : TestSeq := + let detailMatch := BlockCompareDetail.mk .match 10 20 + let detailMismatch := BlockCompareDetail.mk (.mismatch 100 200 50) 15 25 + let detailNotFound := BlockCompareDetail.mk .notFound 5 0 + test "BlockCompareDetail match" (roundtripBlockCompareDetail detailMatch == detailMatch) ++ + test "BlockCompareDetail mismatch" (roundtripBlockCompareDetail detailMismatch == detailMismatch) ++ + test "BlockCompareDetail notFound" (roundtripBlockCompareDetail detailNotFound == detailNotFound) + +/-! ## Shrinkable instances -/ + +instance : Shrinkable Ix.RustCondensedBlocks where + shrink cb := + if cb.lowLinks.isEmpty && cb.blocks.isEmpty && cb.blockRefs.isEmpty then [] + else [{ + lowLinks := if cb.lowLinks.isEmpty then #[] else cb.lowLinks.pop, + blocks := if cb.blocks.isEmpty then #[] else cb.blocks.pop, + blockRefs := if cb.blockRefs.isEmpty then #[] else cb.blockRefs.pop + }] + +/-! ## Ix type comparison by hash -/ + +def ixNameEq (a b : Ix.Name) : Bool := a.getHash == b.getHash +def ixLevelEq (a b : Ix.Level) : Bool := a.getHash == b.getHash +def ixExprEq (a b : Ix.Expr) : Bool := a.getHash == b.getHash + +/-! ## Comparison helpers -/ + +/-- Compare RustCondensedBlocks by checking array sizes. + Size-only: element-wise comparison is not feasible because the Lean and Rust + condensation algorithms may produce different SCC orderings. -/ +def rustCondensedBlocksEq (a b : Ix.RustCondensedBlocks) : Bool := + a.lowLinks.size == b.lowLinks.size && + a.blocks.size == b.blocks.size && + a.blockRefs.size == b.blockRefs.size + +/-- Compare Ix.ConstantInfo by structural equality (all fields). -/ +def ixConstantInfoEq (a b : Ix.ConstantInfo) : Bool := a == b + +/-- Compare RawEnvironment with content-aware comparison. + Checks that all constants in a have matching constants in b by name hash. -/ +def ixRawEnvironmentEq (a b : Ix.RawEnvironment) : Bool := + a.consts.size == b.consts.size && + a.consts.all fun (name, info) => + b.consts.any fun (name', info') => + ixNameEq name name' && ixConstantInfoEq info info' + +/-- Compare RustCompilePhases by checking sizes -/ +def rustCompilePhasesEq (a b : Ix.CompileM.RustCompilePhases) : Bool := + ixRawEnvironmentEq a.rawEnv b.rawEnv && + rustCondensedBlocksEq a.condensed b.condensed && + rawEnvEq a.compileEnv b.compileEnv + +/-! ## Ix.RawEnvironment unit tests -/ + +/-- Test empty RawEnvironment roundtrip -/ +def ixRawEnvironmentTests : TestSeq := + let empty : Ix.RawEnvironment := { consts := #[] } + -- Create a simple ConstantInfo for testing + let name := Ix.Name.mkStr Ix.Name.mkAnon "test" + let expr := Ix.Expr.mkSort Ix.Level.mkZero + let constVal : Ix.ConstantVal := { name := name, levelParams := #[], type := expr } + let axiomVal : Ix.AxiomVal := { cnst := constVal, isUnsafe := false } + let constInfo : Ix.ConstantInfo := .axiomInfo axiomVal + let withOne : Ix.RawEnvironment := { consts := #[(name, constInfo)] } + test "Ix.RawEnvironment empty" (ixRawEnvironmentEq (roundtripIxRawEnvironment empty) empty) ++ + test "Ix.RawEnvironment single const" (ixRawEnvironmentEq (roundtripIxRawEnvironment withOne) withOne) + +/-! ## RustCondensedBlocks unit tests -/ + +def rustCondensedBlocksTests : TestSeq := + let empty : Ix.RustCondensedBlocks := { lowLinks := #[], blocks := #[], blockRefs := #[] } + let n1 := Ix.Name.mkStr Ix.Name.mkAnon "a" + let n2 := Ix.Name.mkStr Ix.Name.mkAnon "b" + let withData : Ix.RustCondensedBlocks := { + lowLinks := #[(n1, n2)], + blocks := #[(n1, #[n1, n2])], + blockRefs := #[(n2, #[n1])] + } + test "RustCondensedBlocks empty" (rustCondensedBlocksEq (roundtripRustCondensedBlocks empty) empty) ++ + test "RustCondensedBlocks with data" (rustCondensedBlocksEq (roundtripRustCondensedBlocks withData) withData) + +/-! ## SerializeError, DecompileError, and CompileError roundtrip FFI -/ + +@[extern "rs_roundtrip_serialize_error"] +opaque roundtripSerializeError : @& Ixon.SerializeError → Ixon.SerializeError + +@[extern "rs_roundtrip_decompile_error"] +opaque roundtripDecompileError : @& Ix.DecompileM.DecompileError → Ix.DecompileM.DecompileError + +@[extern "rs_roundtrip_compile_error"] +opaque roundtripCompileError : @& Ix.CompileM.CompileError → Ix.CompileM.CompileError + +def serializeErrorTests : TestSeq := + test "SerializeError.unexpectedEof" (roundtripSerializeError (.unexpectedEof "u64") == .unexpectedEof "u64") ++ + test "SerializeError.invalidTag" (roundtripSerializeError (.invalidTag 0xFF "expr") == .invalidTag 0xFF "expr") ++ + test "SerializeError.invalidFlag" (roundtripSerializeError (.invalidFlag 3 "univ") == .invalidFlag 3 "univ") ++ + test "SerializeError.invalidVariant" (roundtripSerializeError (.invalidVariant 99 "const") == .invalidVariant 99 "const") ++ + test "SerializeError.invalidBool" (roundtripSerializeError (.invalidBool 2) == .invalidBool 2) ++ + test "SerializeError.addressError" (roundtripSerializeError .addressError == .addressError) ++ + test "SerializeError.invalidShareIndex" (roundtripSerializeError (.invalidShareIndex 5 10) == .invalidShareIndex 5 10) + +def decompileErrorTests : TestSeq := + let addr := Address.blake3 (ByteArray.mk #[1, 2, 3]) + let se := Ixon.SerializeError.unexpectedEof "test" + test "DecompileError.invalidRefIndex" (roundtripDecompileError (.invalidRefIndex 5 10 "test") == .invalidRefIndex 5 10 "test") ++ + test "DecompileError.invalidUnivIndex" (roundtripDecompileError (.invalidUnivIndex 3 7 "foo") == .invalidUnivIndex 3 7 "foo") ++ + test "DecompileError.invalidShareIndex" (roundtripDecompileError (.invalidShareIndex 2 5 "bar") == .invalidShareIndex 2 5 "bar") ++ + test "DecompileError.invalidRecIndex" (roundtripDecompileError (.invalidRecIndex 1 4 "baz") == .invalidRecIndex 1 4 "baz") ++ + test "DecompileError.invalidUnivVarIndex" (roundtripDecompileError (.invalidUnivVarIndex 8 3 "qux") == .invalidUnivVarIndex 8 3 "qux") ++ + test "DecompileError.missingAddress" (roundtripDecompileError (.missingAddress addr) == .missingAddress addr) ++ + test "DecompileError.missingMetadata" (roundtripDecompileError (.missingMetadata addr) == .missingMetadata addr) ++ + test "DecompileError.blobNotFound" (roundtripDecompileError (.blobNotFound addr) == .blobNotFound addr) ++ + test "DecompileError.badBlobFormat" (roundtripDecompileError (.badBlobFormat addr "UTF-8") == .badBlobFormat addr "UTF-8") ++ + test "DecompileError.badConstantFormat" (roundtripDecompileError (.badConstantFormat "bad") == .badConstantFormat "bad") ++ + test "DecompileError.serializeError" (roundtripDecompileError (.serializeError se) == .serializeError se) + +def compileErrorTests : TestSeq := + let addr := Address.blake3 (ByteArray.mk #[4, 5, 6]) + let se := Ixon.SerializeError.addressError + test "CompileError.missingConstant" (roundtripCompileError (.missingConstant "Nat.add") == .missingConstant "Nat.add") ++ + test "CompileError.missingAddress" (roundtripCompileError (.missingAddress addr) == .missingAddress addr) ++ + test "CompileError.invalidMutualBlock" (roundtripCompileError (.invalidMutualBlock "empty") == .invalidMutualBlock "empty") ++ + test "CompileError.unsupportedExpr" (roundtripCompileError (.unsupportedExpr "mvar") == .unsupportedExpr "mvar") ++ + test "CompileError.unknownUnivParam" (roundtripCompileError (.unknownUnivParam "Nat" "u") == .unknownUnivParam "Nat" "u") ++ + test "CompileError.serializeError" (roundtripCompileError (.serializeError se) == .serializeError se) + +/-! ## Test Suite -/ + +def suite : List TestSeq := [ + -- Block comparison types + blockCompareResultTests, + blockCompareDetailTests, + -- Error type roundtrips + serializeErrorTests, + decompileErrorTests, + compileErrorTests, + -- Environment unit tests + ixRawEnvironmentTests, + rustCondensedBlocksTests, + -- Property tests for basic Ix types + checkIO "Address roundtrip" (∀ a : Address, roundtripIxAddress a == a), + checkIO "Ix.Name roundtrip" (∀ n : Ix.Name, ixNameEq (roundtripIxName n) n), + checkIO "Ix.Level roundtrip" (∀ l : Ix.Level, ixLevelEq (roundtripIxLevel l) l), + checkIO "Ix.Expr roundtrip" (∀ e : Ix.Expr, ixExprEq (roundtripIxExpr e) e), + checkIO "Ix.Int roundtrip" (∀ i : Ix.Int, roundtripIxInt i == i), + checkIO "Ix.Substring roundtrip" (∀ s : Ix.Substring, roundtripIxSubstring s == s), + checkIO "Ix.SourceInfo roundtrip" (∀ si : Ix.SourceInfo, roundtripIxSourceInfo si == si), + checkIO "Ix.SyntaxPreresolved roundtrip" (∀ sp : Ix.SyntaxPreresolved, roundtripIxSyntaxPreresolved sp == sp), + checkIO "Ix.Syntax roundtrip" (∀ s : Ix.Syntax, roundtripIxSyntax s == s), + checkIO "Ix.DataValue roundtrip" (∀ dv : Ix.DataValue, roundtripIxDataValue dv == dv), + checkIO "Ix.ConstantInfo roundtrip" (∀ ci : Ix.ConstantInfo, ixConstantInfoEq (roundtripIxConstantInfo ci) ci), + -- Property tests for Environment types + checkIO "Ix.RawEnvironment roundtrip" (∀ env : Ix.RawEnvironment, ixRawEnvironmentEq (roundtripIxRawEnvironment env) env), + -- Composite types + checkIO "RustCondensedBlocks roundtrip" (∀ cb : Ix.RustCondensedBlocks, rustCondensedBlocksEq (roundtripRustCondensedBlocks cb) cb), + checkIO "RustCompilePhases roundtrip" (∀ p : Ix.CompileM.RustCompilePhases, rustCompilePhasesEq (roundtripRustCompilePhases p) p), + -- Error type property tests + checkIO "SerializeError roundtrip" (∀ e : Ixon.SerializeError, roundtripSerializeError e == e), + checkIO "DecompileError roundtrip" (∀ e : Ix.DecompileM.DecompileError, roundtripDecompileError e == e), + checkIO "CompileError roundtrip" (∀ e : Ix.CompileM.CompileError, roundtripCompileError e == e), +] + +end Tests.FFI.Ix diff --git a/Tests/FFI/Ixon.lean b/Tests/FFI/Ixon.lean new file mode 100644 index 00000000..64b828f3 --- /dev/null +++ b/Tests/FFI/Ixon.lean @@ -0,0 +1,309 @@ +/- + Ixon.* type FFI roundtrip tests. + Pattern: Lean value → Rust (decode) → Rust (re-encode via C API) → Lean value → compare +-/ + +import LSpec +import Tests.Gen.Ixon +import Ix.Ixon + +open LSpec SlimCheck Gen Ixon +open Ix (DefKind DefinitionSafety QuotKind) + +namespace Tests.FFI.Ixon + +/-! ## Ixon type roundtrip FFI declarations -/ + +-- Simple enums (use lean_box/lean_unbox) +@[extern "rs_roundtrip_ixon_def_kind"] +opaque roundtripIxonDefKind : @& DefKind → DefKind + +@[extern "rs_roundtrip_ixon_definition_safety"] +opaque roundtripIxonDefinitionSafety : @& DefinitionSafety → DefinitionSafety + +@[extern "rs_roundtrip_ixon_quot_kind"] +opaque roundtripIxonQuotKind : @& QuotKind → QuotKind + +-- Core recursive types +@[extern "rs_roundtrip_ixon_univ"] +opaque roundtripIxonUniv : @& Univ → Univ + +@[extern "rs_roundtrip_ixon_expr"] +opaque roundtripIxonExpr : @& Expr → Expr + +-- Constant structures +@[extern "rs_roundtrip_ixon_definition"] +opaque roundtripIxonDefinition : @& Definition → Definition + +@[extern "rs_roundtrip_ixon_recursor_rule"] +opaque roundtripIxonRecursorRule : @& RecursorRule → RecursorRule + +@[extern "rs_roundtrip_ixon_recursor"] +opaque roundtripIxonRecursor : @& Recursor → Recursor + +@[extern "rs_roundtrip_ixon_axiom"] +opaque roundtripIxonAxiom : @& Axiom → Axiom + +@[extern "rs_roundtrip_ixon_quotient"] +opaque roundtripIxonQuotient : @& Quotient → Quotient + +@[extern "rs_roundtrip_ixon_constructor"] +opaque roundtripIxonConstructor : @& Constructor → Constructor + +@[extern "rs_roundtrip_ixon_inductive"] +opaque roundtripIxonInductive : @& Inductive → Inductive + +-- Projection types +@[extern "rs_roundtrip_ixon_inductive_proj"] +opaque roundtripIxonInductiveProj : @& InductiveProj → InductiveProj + +@[extern "rs_roundtrip_ixon_constructor_proj"] +opaque roundtripIxonConstructorProj : @& ConstructorProj → ConstructorProj + +@[extern "rs_roundtrip_ixon_recursor_proj"] +opaque roundtripIxonRecursorProj : @& RecursorProj → RecursorProj + +@[extern "rs_roundtrip_ixon_definition_proj"] +opaque roundtripIxonDefinitionProj : @& DefinitionProj → DefinitionProj + +-- Composite types +@[extern "rs_roundtrip_ixon_mut_const"] +opaque roundtripIxonMutConst : @& MutConst → MutConst + +@[extern "rs_roundtrip_ixon_constant_info"] +opaque roundtripIxonConstantInfo : @& ConstantInfo → ConstantInfo + +@[extern "rs_roundtrip_ixon_constant"] +opaque roundtripIxonConstant : @& Constant → Constant + +-- Metadata types +@[extern "rs_roundtrip_ixon_data_value"] +opaque roundtripIxonDataValue : @& DataValue → DataValue + +@[extern "rs_roundtrip_ixon_expr_meta_data"] +opaque roundtripIxonExprMetaData : @& ExprMetaData → ExprMetaData + +@[extern "rs_roundtrip_ixon_expr_meta_arena"] +opaque roundtripIxonExprMetaArena : @& ExprMetaArena → ExprMetaArena + +@[extern "rs_roundtrip_ixon_constant_meta"] +opaque roundtripIxonConstantMeta : @& ConstantMeta → ConstantMeta + +@[extern "rs_roundtrip_ixon_named"] +opaque roundtripIxonNamed : @& Named → Named + +@[extern "rs_roundtrip_ixon_comm"] +opaque roundtripIxonComm : @& Comm → Comm + +/-! ## Ixon type FFI unit tests -/ + +def ixonDefKindTests : TestSeq := + test "Ixon.DefKind.defn" (roundtripIxonDefKind .defn == .defn) ++ + test "Ixon.DefKind.opaq" (roundtripIxonDefKind .opaq == .opaq) ++ + test "Ixon.DefKind.thm" (roundtripIxonDefKind .thm == .thm) + +def ixonDefinitionSafetyTests : TestSeq := + test "Ixon.DefinitionSafety.unsaf" (roundtripIxonDefinitionSafety .unsaf == .unsaf) ++ + test "Ixon.DefinitionSafety.safe" (roundtripIxonDefinitionSafety .safe == .safe) ++ + test "Ixon.DefinitionSafety.part" (roundtripIxonDefinitionSafety .part == .part) + +def ixonQuotKindTests : TestSeq := + test "Ixon.QuotKind.type" (roundtripIxonQuotKind .type == .type) ++ + test "Ixon.QuotKind.ctor" (roundtripIxonQuotKind .ctor == .ctor) ++ + test "Ixon.QuotKind.lift" (roundtripIxonQuotKind .lift == .lift) ++ + test "Ixon.QuotKind.ind" (roundtripIxonQuotKind .ind == .ind) + +def ixonUnivTests : TestSeq := + test "Ixon.Univ.zero" (roundtripIxonUniv .zero == .zero) ++ + test "Ixon.Univ.var 0" (roundtripIxonUniv (.var 0) == .var 0) ++ + test "Ixon.Univ.var 42" (roundtripIxonUniv (.var 42) == .var 42) ++ + test "Ixon.Univ.succ zero" (roundtripIxonUniv (.succ .zero) == .succ .zero) ++ + test "Ixon.Univ.max" (roundtripIxonUniv (.max .zero (.var 1)) == .max .zero (.var 1)) ++ + test "Ixon.Univ.imax" (roundtripIxonUniv (.imax (.var 0) .zero) == .imax (.var 0) .zero) + +def ixonExprTests : TestSeq := + test "Ixon.Expr.sort" (roundtripIxonExpr (.sort 0) == .sort 0) ++ + test "Ixon.Expr.var" (roundtripIxonExpr (.var 5) == .var 5) ++ + test "Ixon.Expr.ref" (roundtripIxonExpr (.ref 1 #[0, 1]) == .ref 1 #[0, 1]) ++ + test "Ixon.Expr.recur" (roundtripIxonExpr (.recur 2 #[]) == .recur 2 #[]) ++ + test "Ixon.Expr.str" (roundtripIxonExpr (.str 3) == .str 3) ++ + test "Ixon.Expr.nat" (roundtripIxonExpr (.nat 42) == .nat 42) ++ + test "Ixon.Expr.share" (roundtripIxonExpr (.share 0) == .share 0) ++ + test "Ixon.Expr.app" (roundtripIxonExpr (.app (.var 0) (.var 1)) == .app (.var 0) (.var 1)) ++ + test "Ixon.Expr.lam" (roundtripIxonExpr (.lam (.sort 0) (.var 0)) == .lam (.sort 0) (.var 0)) ++ + test "Ixon.Expr.all" (roundtripIxonExpr (.all (.sort 0) (.var 0)) == .all (.sort 0) (.var 0)) ++ + test "Ixon.Expr.letE" (roundtripIxonExpr (.letE true (.sort 0) (.var 0) (.var 1)) == .letE true (.sort 0) (.var 0) (.var 1)) ++ + test "Ixon.Expr.prj" (roundtripIxonExpr (.prj 0 1 (.var 0)) == .prj 0 1 (.var 0)) + +/-! ## Metadata Unit Tests -/ + +def exprMetaDataTests : TestSeq := + let testAddr := Address.blake3 (ByteArray.mk #[1, 2, 3]) + let _kvmap : KVMap := #[(testAddr, DataValue.ofBool true)] + test "ExprMetaData.leaf" (roundtripIxonExprMetaData .leaf == .leaf) ++ + test "ExprMetaData.app" (roundtripIxonExprMetaData (.app 0 1) == .app 0 1) ++ + test "ExprMetaData.ref" (roundtripIxonExprMetaData (.ref testAddr) == .ref testAddr) ++ + test "ExprMetaData.prj" (roundtripIxonExprMetaData (.prj testAddr 5) == .prj testAddr 5) ++ + test "ExprMetaData.letBinder" (roundtripIxonExprMetaData (.letBinder testAddr 0 1 2) == .letBinder testAddr 0 1 2) ++ + test "ExprMetaData.mdata empty" (roundtripIxonExprMetaData (.mdata #[] 0) == .mdata #[] 0) ++ + test "ExprMetaData.mdata with kvmap" (roundtripIxonExprMetaData (.mdata #[_kvmap] 3) == .mdata #[_kvmap] 3) ++ + test "ExprMetaData.binder default" (roundtripIxonExprMetaData (.binder testAddr .default 0 1) == .binder testAddr .default 0 1) ++ + test "ExprMetaData.binder implicit" (roundtripIxonExprMetaData (.binder testAddr .implicit 2 3) == .binder testAddr .implicit 2 3) ++ + test "ExprMetaData.binder strictImplicit" (roundtripIxonExprMetaData (.binder testAddr .strictImplicit 0 0) == .binder testAddr .strictImplicit 0 0) ++ + test "ExprMetaData.binder instImplicit" (roundtripIxonExprMetaData (.binder testAddr .instImplicit 0 0) == .binder testAddr .instImplicit 0 0) + +def exprMetaArenaTests : TestSeq := + let testAddr := Address.blake3 (ByteArray.mk #[1, 2, 3]) + let emptyArena : ExprMetaArena := {} + let singleLeaf : ExprMetaArena := { nodes := #[.leaf] } + let smallArena : ExprMetaArena := { nodes := #[.leaf, .app 0 0, .ref testAddr] } + let mixedArena : ExprMetaArena := { nodes := #[ + .leaf, .ref testAddr, .app 0 0, .binder testAddr .default 0 1, + .letBinder testAddr 0 1 2, .prj testAddr 0, .mdata #[] 0 + ] } + checkIO "ExprMetaArena empty" (roundtripIxonExprMetaArena emptyArena == emptyArena) ++ + checkIO "ExprMetaArena single leaf" (roundtripIxonExprMetaArena singleLeaf == singleLeaf) ++ + checkIO "ExprMetaArena small" (roundtripIxonExprMetaArena smallArena == smallArena) ++ + checkIO "ExprMetaArena mixed" (roundtripIxonExprMetaArena mixedArena == mixedArena) + +def constantMetaTests : TestSeq := + let testAddr := Address.blake3 (ByteArray.mk #[1, 2, 3]) + let emptyArena : ExprMetaArena := {} + let smallArena : ExprMetaArena := { nodes := #[.leaf, .app 0 0, .ref testAddr] } + checkIO "ConstantMeta.empty" (roundtripIxonConstantMeta .empty == .empty) ++ + checkIO "ConstantMeta.defn" (roundtripIxonConstantMeta + (.defn testAddr #[testAddr] .opaque #[] #[] smallArena 0 1) == + .defn testAddr #[testAddr] .opaque #[] #[] smallArena 0 1) ++ + checkIO "ConstantMeta.axio" (roundtripIxonConstantMeta + (.axio testAddr #[] emptyArena 0) == + .axio testAddr #[] emptyArena 0) ++ + checkIO "ConstantMeta.ctor" (roundtripIxonConstantMeta + (.ctor testAddr #[] testAddr smallArena 2) == + .ctor testAddr #[] testAddr smallArena 2) ++ + checkIO "ConstantMeta.recr" (roundtripIxonConstantMeta + (.recr testAddr #[] #[] #[] #[] smallArena 0 #[1, 2]) == + .recr testAddr #[] #[] #[] #[] smallArena 0 #[1, 2]) + +/-! ## Cross-implementation serialization comparison FFI declarations -/ + +@[extern "rs_eq_univ_serialization"] +opaque rsEqUnivSerialization : @& Univ → @& ByteArray → Bool + +@[extern "rs_eq_expr_serialization"] +opaque rsEqExprSerialization : @& Expr → @& ByteArray → Bool + +@[extern "rs_eq_constant_serialization"] +opaque rsEqConstantSerialization : @& Constant → @& ByteArray → Bool + +@[extern "rs_eq_env_serialization"] +opaque rsEqEnvSerialization : @& RawEnv → @& ByteArray → Bool + +/-! ## RawEnv roundtrip FFI -/ + +@[extern "rs_roundtrip_raw_env"] +opaque roundtripRawEnv : @& RawEnv → RawEnv + +instance : Shrinkable RawEnv where + shrink env := + if env.consts.isEmpty && env.named.isEmpty && env.blobs.isEmpty && env.comms.isEmpty then [] + else [{ + consts := if env.consts.isEmpty then #[] else env.consts.pop, + named := if env.named.isEmpty then #[] else env.named.pop, + blobs := if env.blobs.isEmpty then #[] else env.blobs.pop, + comms := if env.comms.isEmpty then #[] else env.comms.pop + }] + +/-- Compare RawEnv with content-aware comparison. + Checks array sizes and content matching by Address. -/ +def rawEnvEq (a b : RawEnv) : Bool := + a.consts.size == b.consts.size && + a.named.size == b.named.size && + a.blobs.size == b.blobs.size && + a.comms.size == b.comms.size && + a.names.size == b.names.size && + -- Size equality + one-directional all/any is sufficient when addresses are unique: + -- if sizes match and every element in 'a' has a match in 'b', then 'b' cannot + -- have extra elements (since sizes are equal and addresses uniquely identify items). + -- Content comparison for consts + a.consts.all fun rc => + b.consts.any fun rc' => rc.addr == rc'.addr && + rc.const.info == rc'.const.info && + rc.const.sharing.size == rc'.const.sharing.size && + rc.const.refs.size == rc'.const.refs.size && + rc.const.univs.size == rc'.const.univs.size + +/-! ## RawEnv unit tests -/ + +def rawEnvTests : TestSeq := + let empty : RawEnv := { consts := #[], named := #[], blobs := #[], comms := #[] } + -- Create test data for non-empty case + let testAddr := Address.blake3 (ByteArray.mk #[1, 2, 3]) + let testExpr : Expr := .sort 0 + let testDef : Definition := { + kind := .defn, safety := .safe, lvls := 0, + typ := testExpr, value := testExpr + } + let testConst : Constant := { + info := .defn testDef, sharing := #[], refs := #[], univs := #[] + } + let testRawConst : RawConst := { addr := testAddr, const := testConst } + let testComm : Comm := { secret := testAddr, payload := testAddr } + let testRawComm : RawComm := { addr := testAddr, comm := testComm } + let testRawBlob : RawBlob := { addr := testAddr, bytes := ByteArray.mk #[1, 2, 3] } + let testName := Ix.Name.mkStr Ix.Name.mkAnon "test" + let testRawNamed : RawNamed := { + name := testName, addr := testAddr, constMeta := .empty + } + let withData : RawEnv := { + consts := #[testRawConst], + named := #[testRawNamed], + blobs := #[testRawBlob], + comms := #[testRawComm] + } + test "RawEnv empty" (rawEnvEq (roundtripRawEnv empty) empty) ++ + test "RawEnv with data" (rawEnvEq (roundtripRawEnv withData) withData) + +/-! ## Test Suite -/ + +def suite : List TestSeq := [ + -- Ixon type unit tests + ixonDefKindTests, + ixonDefinitionSafetyTests, + ixonQuotKindTests, + ixonUnivTests, + ixonExprTests, + exprMetaDataTests, + exprMetaArenaTests, + constantMetaTests, + rawEnvTests, + -- Ixon property tests - basic types + checkIO "Ixon.DefKind roundtrip" (∀ x : DefKind, roundtripIxonDefKind x == x), + checkIO "Ixon.DefinitionSafety roundtrip" (∀ x : DefinitionSafety, roundtripIxonDefinitionSafety x == x), + checkIO "Ixon.QuotKind roundtrip" (∀ x : QuotKind, roundtripIxonQuotKind x == x), + checkIO "Ixon.Univ roundtrip" (∀ x : Univ, roundtripIxonUniv x == x), + checkIO "Ixon.Expr roundtrip" (∀ x : Expr, roundtripIxonExpr x == x), + checkIO "Ixon.Definition roundtrip" (∀ x : Definition, roundtripIxonDefinition x == x), + checkIO "Ixon.RecursorRule roundtrip" (∀ x : RecursorRule, roundtripIxonRecursorRule x == x), + checkIO "Ixon.Recursor roundtrip" (∀ x : Recursor, roundtripIxonRecursor x == x), + checkIO "Ixon.Axiom roundtrip" (∀ x : Axiom, roundtripIxonAxiom x == x), + checkIO "Ixon.Quotient roundtrip" (∀ x : Quotient, roundtripIxonQuotient x == x), + checkIO "Ixon.Constructor roundtrip" (∀ x : Constructor, roundtripIxonConstructor x == x), + checkIO "Ixon.Inductive roundtrip" (∀ x : Inductive, roundtripIxonInductive x == x), + checkIO "Ixon.InductiveProj roundtrip" (∀ x : InductiveProj, roundtripIxonInductiveProj x == x), + checkIO "Ixon.ConstructorProj roundtrip" (∀ x : ConstructorProj, roundtripIxonConstructorProj x == x), + checkIO "Ixon.RecursorProj roundtrip" (∀ x : RecursorProj, roundtripIxonRecursorProj x == x), + checkIO "Ixon.DefinitionProj roundtrip" (∀ x : DefinitionProj, roundtripIxonDefinitionProj x == x), + checkIO "Ixon.MutConst roundtrip" (∀ x : MutConst, roundtripIxonMutConst x == x), + checkIO "Ixon.ConstantInfo roundtrip" (∀ x : ConstantInfo, roundtripIxonConstantInfo x == x), + checkIO "Ixon.Constant roundtrip" (∀ x : Constant, roundtripIxonConstant x == x), + checkIO "Ixon.DataValue roundtrip" (∀ x : DataValue, roundtripIxonDataValue x == x), + checkIO "Ixon.Comm roundtrip" (∀ x : Comm, roundtripIxonComm x == x), + -- Metadata types (arena-based) + checkIO "Ixon.ExprMetaData roundtrip" (∀ x : ExprMetaData, roundtripIxonExprMetaData x == x), + checkIO "Ixon.ConstantMeta roundtrip" (∀ x : ConstantMeta, roundtripIxonConstantMeta x == x), + checkIO "Ixon.Named roundtrip" (∀ x : Named, roundtripIxonNamed x == x), + -- RawEnv roundtrip + checkIO "Ixon.RawEnv roundtrip" (∀ env : RawEnv, rawEnvEq (roundtripRawEnv env) env), +] + +end Tests.FFI.Ixon diff --git a/Tests/FFIConsistency.lean b/Tests/FFIConsistency.lean deleted file mode 100644 index a7224b16..00000000 --- a/Tests/FFIConsistency.lean +++ /dev/null @@ -1,31 +0,0 @@ -import LSpec -import Tests.Common - -open LSpec SlimCheck Gen - -/- Array UInt32 -/ - -def genArrayUInt32 : Gen $ Array UInt32 := do - let numValues ← choose Nat 1 8 - let mut values := Array.emptyWithCapacity numValues - for _ in [:numValues] do - values := values.push $ ← genUInt32 - pure values - -@[extern "rs_boxed_u32s_are_equivalent_to_bytes"] -opaque boxedUInt32sAreEquivalentToBytes : @& Array UInt32 → @& ByteArray → Bool - -def arrayUInt32sToBytes (arr : Array UInt32) : ByteArray := - arr.foldl (init := .emptyWithCapacity (4 * arr.size)) fun acc u => acc ++ u.toLEBytes - -instance : Shrinkable (Array UInt32) where - shrink _ := [] - -instance : SampleableExt (Array UInt32) := SampleableExt.mkSelfContained genArrayUInt32 - -/- Suite -/ - -def Tests.FFIConsistency.suite := [ - check "Boxed UInt32s are unboxed correctly in Rust" - (∀ arr : Array UInt32, boxedUInt32sAreEquivalentToBytes arr (arrayUInt32sToBytes arr)), - ] diff --git a/Tests/Gen/Basic.lean b/Tests/Gen/Basic.lean new file mode 100644 index 00000000..aa6e1a76 --- /dev/null +++ b/Tests/Gen/Basic.lean @@ -0,0 +1,182 @@ +/- + Basic type generators for testing. + Generators and test types for property-based FFI roundtrip tests. +-/ + +import LSpec +import Std.Data.HashMap + +open LSpec SlimCheck Gen + +/-! ## Helper combinators -/ + +def genUInt8 : Gen UInt8 := + UInt8.ofNat <$> choose Nat 0 0xFF + +def genUInt32 : Gen UInt32 := + UInt32.ofNat <$> choose Nat 0 0xFFFFFFFF + +def genUInt64 : Gen UInt64 := + UInt64.ofNat <$> choose Nat 0 0xFFFFFFFFFFFFFFFF + +def genUSize : Gen USize := + .ofNat <$> choose Nat 0 (2^System.Platform.numBits - 1) + +def frequency' (default: Gen α) (xs: List (Nat × Gen α)) : Gen α := do + let n ← choose Nat 0 total + pick n xs + where + total := List.sum (Prod.fst <$> xs) + pick n xs := match xs with + | [] => default + | (k, x) :: xs => if n <= k then x else pick (n - k) xs + +def frequency [Inhabited α] (xs: List (Nat × Gen α)) : Gen α := + frequency' xs.head!.snd xs + +def oneOf' [Inhabited α] (xs: List (Gen α)) : Gen α := + frequency (xs.map (fun x => (100, x))) + +/-! ## Basic type generators -/ + +/-- Generate Nats across the full range: small, medium, large, and huge -/ +def genNat : Gen Nat := do + let choice ← choose Nat 0 100 + if choice < 50 then + -- 50%: small nats (0-1000) + choose Nat 0 1000 + else if choice < 75 then + -- 25%: medium nats (up to 2^32) + choose Nat 0 (2^32) + else if choice < 90 then + -- 15%: large nats (up to 2^64) + choose Nat 0 (2^64) + else + -- 10%: huge nats (up to 2^256) + choose Nat 0 (2^256) + +def genSmallNat : Gen Nat := choose Nat 0 1000 + +def genString : Gen String := do + let len ← choose Nat 0 100 + let chars ← Gen.listOf (choose Nat 32 126 >>= fun n => pure (Char.ofNat n)) + pure (String.ofList (chars.take len)) + +def genListNat : Gen (List Nat) := do + let len ← choose Nat 0 20 + let mut result := [] + for _ in [:len] do + result := (← genSmallNat) :: result + pure result.reverse + +def genArrayNat : Gen (Array Nat) := do + let list ← genListNat + pure list.toArray + +def genByteArray : Gen ByteArray := do + let len ← choose Nat 0 100 + let mut bytes := ByteArray.emptyWithCapacity len + for _ in [:len] do + let b ← choose Nat 0 255 + bytes := bytes.push b.toUInt8 + pure bytes + +def genBool : Gen Bool := choose Bool .false true + +/-! ## Test struct generators -/ + +/-- A simple 2D point struct for FFI testing -/ +structure Point where + x : Nat + y : Nat +deriving Repr, BEq, DecidableEq, Inhabited + +def genPoint : Gen Point := do + let x ← genSmallNat + let y ← genSmallNat + pure ⟨x, y⟩ + +/-- A simple binary tree of Nats for FFI testing -/ +inductive NatTree where + | leaf : Nat → NatTree + | node : NatTree → NatTree → NatTree +deriving Repr, BEq, DecidableEq, Inhabited + +/-- Generate a random NatTree with bounded depth -/ +def genNatTree : Nat → Gen NatTree + | 0 => do + let n ← genSmallNat + pure (.leaf n) + | maxDepth + 1 => do + let choice ← choose Nat 0 2 + if choice == 0 then + let n ← genSmallNat + pure (.leaf n) + else + let left ← genNatTree maxDepth + let right ← genNatTree maxDepth + pure (.node left right) + +def genHashMapNatNat : Gen (Std.HashMap Nat Nat) := do + let len ← choose Nat 0 20 + let mut map : Std.HashMap Nat Nat := {} + for _ in [:len] do + let k ← genSmallNat + let v ← genSmallNat + map := map.insert k v + pure map + +/-! ## Shrinkable instances -/ + +instance : Shrinkable Nat where + shrink n := if n == 0 then [] else [n / 2] + +instance : Shrinkable (List Nat) where + shrink xs := match xs with + | [] => [] + | _ :: tail => [tail] + +instance : Shrinkable (Array Nat) where + shrink arr := if arr.isEmpty then [] else [arr.pop] + +instance : Repr ByteArray where + reprPrec ba _ := s!"ByteArray#{ba.toList}" + +instance : Shrinkable ByteArray where + shrink ba := if ba.isEmpty then [] else [ba.extract 0 (ba.size - 1)] + +instance : Shrinkable String where + shrink s := if s.isEmpty then [] else [s.dropRight 1] + +instance : Shrinkable Point where + shrink p := if p.x == 0 && p.y == 0 then [] else [⟨p.x / 2, p.y / 2⟩] + +instance : Shrinkable NatTree where + shrink t := match t with + | .leaf n => if n == 0 then [] else [.leaf (n / 2)] + | .node l r => [l, r] + +instance : Shrinkable (Std.HashMap Nat Nat) where + shrink m := + let list := m.toList + match list with + | [] => [] + | _ :: tail => [Std.HashMap.ofList tail] + +/-! ## SampleableExt instances -/ + +instance : SampleableExt Nat := SampleableExt.mkSelfContained genNat + +instance : SampleableExt (List Nat) := SampleableExt.mkSelfContained genListNat + +instance : SampleableExt (Array Nat) := SampleableExt.mkSelfContained genArrayNat + +instance : SampleableExt ByteArray := SampleableExt.mkSelfContained genByteArray + +instance : SampleableExt String := SampleableExt.mkSelfContained genString + +instance : SampleableExt Point := SampleableExt.mkSelfContained genPoint + +instance : SampleableExt NatTree := SampleableExt.mkSelfContained (genNatTree 4) + +instance : SampleableExt (Std.HashMap Nat Nat) := SampleableExt.mkSelfContained genHashMapNatNat diff --git a/Tests/Gen/Claim.lean b/Tests/Gen/Claim.lean new file mode 100644 index 00000000..303cb6c9 --- /dev/null +++ b/Tests/Gen/Claim.lean @@ -0,0 +1,130 @@ +/- + Generators for Ix.Claim types (RevealConstructorInfo, RevealRecursorRule, etc.). +-/ + +import LSpec +import Tests.Gen.Ixon +import Ix.Claim + +open LSpec SlimCheck Gen +open Ix (RevealConstructorInfo RevealRecursorRule RevealMutConstInfo RevealConstantInfo Claim + DefKind DefinitionSafety QuotKind) +open Tests.Gen.Ixon (genAddress genUInt64Small genDefKind genDefinitionSafety genQuotKindNew + genSmallArray) + +namespace Tests.Gen.Claim + +/-! ## Helper -/ + +def genOptional (g : Gen α) : Gen (Option α) := + frequency [ + (1, pure none), + (1, some <$> g), + ] + +/-! ## Generators -/ + +def genRevealConstructorInfo : Gen RevealConstructorInfo := + RevealConstructorInfo.mk + <$> genOptional genBool + <*> genOptional genUInt64Small + <*> genOptional genUInt64Small + <*> genOptional genUInt64Small + <*> genOptional genUInt64Small + <*> genOptional genAddress + +def genRevealRecursorRule : Gen RevealRecursorRule := + RevealRecursorRule.mk <$> genUInt64Small <*> genUInt64Small <*> genAddress + +def genRevealMutConstInfo : Gen RevealMutConstInfo := + frequency [ + (10, RevealMutConstInfo.defn + <$> genOptional genDefKind <*> genOptional genDefinitionSafety + <*> genOptional genUInt64Small <*> genOptional genAddress <*> genOptional genAddress), + (5, RevealMutConstInfo.indc + <$> genOptional genBool <*> genOptional genBool <*> genOptional genBool + <*> genOptional genUInt64Small <*> genOptional genUInt64Small + <*> genOptional genUInt64Small <*> genOptional genUInt64Small + <*> genOptional genAddress + <*> genOptional (genSmallArray (Prod.mk <$> genUInt64Small <*> genRevealConstructorInfo))), + (5, RevealMutConstInfo.recr + <$> genOptional genBool <*> genOptional genBool + <*> genOptional genUInt64Small <*> genOptional genUInt64Small + <*> genOptional genUInt64Small <*> genOptional genUInt64Small + <*> genOptional genUInt64Small <*> genOptional genAddress + <*> genOptional (genSmallArray genRevealRecursorRule)), + ] + +def genRevealConstantInfo : Gen RevealConstantInfo := + frequency [ + (10, RevealConstantInfo.defn + <$> genOptional genDefKind <*> genOptional genDefinitionSafety + <*> genOptional genUInt64Small <*> genOptional genAddress <*> genOptional genAddress), + (5, RevealConstantInfo.recr + <$> genOptional genBool <*> genOptional genBool + <*> genOptional genUInt64Small <*> genOptional genUInt64Small + <*> genOptional genUInt64Small <*> genOptional genUInt64Small + <*> genOptional genUInt64Small <*> genOptional genAddress + <*> genOptional (genSmallArray genRevealRecursorRule)), + (10, RevealConstantInfo.axio + <$> genOptional genBool <*> genOptional genUInt64Small <*> genOptional genAddress), + (10, RevealConstantInfo.quot + <$> genOptional genQuotKindNew <*> genOptional genUInt64Small <*> genOptional genAddress), + (10, RevealConstantInfo.cPrj + <$> genOptional genUInt64Small <*> genOptional genUInt64Small <*> genOptional genAddress), + (5, RevealConstantInfo.rPrj + <$> genOptional genUInt64Small <*> genOptional genAddress), + (5, RevealConstantInfo.iPrj + <$> genOptional genUInt64Small <*> genOptional genAddress), + (5, RevealConstantInfo.dPrj + <$> genOptional genUInt64Small <*> genOptional genAddress), + (5, RevealConstantInfo.muts + <$> genSmallArray (Prod.mk <$> genUInt64Small <*> genRevealMutConstInfo)), + ] + +def genClaim : Gen Claim := + frequency [ + (10, Claim.eval <$> genAddress <*> genAddress), + (10, Claim.check <$> genAddress), + (10, Claim.reveal <$> genAddress <*> genRevealConstantInfo), + ] + +/-! ## Shrinkable instances -/ + +instance : Shrinkable RevealConstructorInfo where + shrink info := + if info.isUnsafe.isSome || info.lvls.isSome || info.cidx.isSome || + info.params.isSome || info.fields.isSome || info.typ.isSome + then [⟨none, none, none, none, none, none⟩] + else [] + +instance : Shrinkable RevealRecursorRule where + shrink rule := + (if rule.ruleIdx > 0 then [{ rule with ruleIdx := rule.ruleIdx / 2 }] else []) ++ + (if rule.fields > 0 then [{ rule with fields := rule.fields / 2 }] else []) + +instance : Shrinkable RevealMutConstInfo where + shrink + | .defn none none none none none => [] + | _ => [.defn none none none none none] + +instance : Shrinkable RevealConstantInfo where + shrink + | .axio none none none => [] + | _ => [.axio none none none] + +instance : Shrinkable Claim where + shrink + | .check _ => [] + | .eval input _ => [.check input] + | .reveal comm info => (.reveal comm <$> Shrinkable.shrink info) ++ [.check comm] + +/-! ## SampleableExt instances -/ + +instance : SampleableExt RevealConstructorInfo := SampleableExt.mkSelfContained genRevealConstructorInfo +instance : SampleableExt RevealRecursorRule := SampleableExt.mkSelfContained genRevealRecursorRule +instance : SampleableExt RevealMutConstInfo := SampleableExt.mkSelfContained genRevealMutConstInfo +instance : SampleableExt RevealConstantInfo := SampleableExt.mkSelfContained genRevealConstantInfo +instance : SampleableExt Claim := SampleableExt.mkSelfContained genClaim + +end Tests.Gen.Claim diff --git a/Tests/Gen/Ix.lean b/Tests/Gen/Ix.lean new file mode 100644 index 00000000..a4e8532a --- /dev/null +++ b/Tests/Gen/Ix.lean @@ -0,0 +1,678 @@ +/- + Generators for Ix.* types (canonical Lean types with Blake3 hashes). + Generators for property-based FFI roundtrip tests. +-/ + +import LSpec +import Tests.Gen.Basic +import Tests.Gen.Ixon +import Ix.Address +import Ix.Environment +import Ix.CondenseM +import Ix.CompileM +import Ix.DecompileM + +open LSpec SlimCheck Gen + +namespace Tests.Gen.Ix + +/-! ## Ix type generators -/ + +/-- Generate Ix.Name with deeper nesting -/ +def genIxName : Nat → Gen Ix.Name + | 0 => pure Ix.Name.mkAnon + | fuel + 1 => Gen.frequency #[ + (1, pure Ix.Name.mkAnon), + (4, do + let parent ← genIxName fuel + let len ← choose Nat 1 12 + let chars ← Gen.listOf (choose Nat 97 122 >>= fun n => pure (Char.ofNat n)) + let s := String.ofList (chars.take len) + pure (Ix.Name.mkStr parent s)), + (3, do + let parent ← genIxName fuel + let n ← choose Nat 0 1000 + pure (Ix.Name.mkNat parent n)) + ] (pure Ix.Name.mkAnon) + +/-- Generate Ix.Level with deeper nesting -/ +def genIxLevel : Nat → Gen Ix.Level + | 0 => Gen.frequency #[ + (3, pure Ix.Level.mkZero), + (2, do let n ← genIxName 2; pure (Ix.Level.mkParam n)) + ] (pure Ix.Level.mkZero) + | fuel + 1 => Gen.frequency #[ + (3, pure Ix.Level.mkZero), + (4, do + let x ← genIxLevel fuel + pure (Ix.Level.mkSucc x)), + (2, do + let x ← genIxLevel (fuel / 2) + let y ← genIxLevel (fuel / 2) + pure (Ix.Level.mkMax x y)), + (2, do + let x ← genIxLevel (fuel / 2) + let y ← genIxLevel (fuel / 2) + pure (Ix.Level.mkIMax x y)), + (3, do + let n ← genIxName 3 + pure (Ix.Level.mkParam n)), + (1, do + let n ← genIxName 3 + pure (Ix.Level.mkMvar n)) + ] (pure Ix.Level.mkZero) + +/-- Generate BinderInfo with varied distribution -/ +def genBinderInfo : Gen Lean.BinderInfo := + frequency [ + (10, pure .default), + (3, pure .implicit), + (2, pure .strictImplicit), + (3, pure .instImplicit), + ] + +/-- Generate a Literal -/ +def genLiteral : Gen Lean.Literal := + frequency [ + (5, Lean.Literal.natVal <$> choose Nat 0 1000), + (5, Lean.Literal.strVal <$> Gen.elements #["hello", "world", "foo", "bar", "test", "literal"]), + ] + +/-- Generate an Ix.Int for DataValue -/ +def genIxInt : Gen Ix.Int := + frequency [ + (5, Ix.Int.ofNat <$> choose Nat 0 100), + (5, Ix.Int.negSucc <$> choose Nat 0 50), + ] + +/-- Generate a random string from a list of options -/ +def genIxString : Gen String := + Gen.elements #["foo", "bar", "test", "x", "y", "value", "item", "data", "node", "leaf"] + +/-- Generate Ix.Substring -/ +def genIxSubstring : Gen Ix.Substring := do + let s ← Gen.elements #["hello world", "test string", "foo bar baz", "quick brown fox", "lorem ipsum"] + let maxLen := s.length + let startPos ← choose Nat 0 (maxLen / 2) + let stopPos ← choose Nat startPos maxLen + pure (Ix.Substring.mk s startPos stopPos) + +instance : Shrinkable Ix.Name where + shrink n := match n with + | .anonymous _ => [] + | .str p _ _ => [p] + | .num p _ _ => [p] + +instance : Shrinkable Ix.Substring where + shrink ss := + (if ss.str.length > 0 then [{ ss with str := "", startPos := 0, stopPos := 0 }] else []) ++ + (if ss.stopPos > ss.startPos then [{ ss with stopPos := ss.startPos }] else []) +instance : SampleableExt Ix.Substring := SampleableExt.mkSelfContained genIxSubstring + +/-- Generate Ix.SourceInfo with all variants -/ +def genIxSourceInfo : Gen Ix.SourceInfo := + frequency [ + (5, pure Ix.SourceInfo.none), + (3, do + let leading ← genIxSubstring + let leadingPos ← choose Nat 0 100 + let trailing ← genIxSubstring + let trailingPos ← choose Nat 0 100 + pure (Ix.SourceInfo.original leading leadingPos trailing trailingPos)), + (2, do + let start ← choose Nat 0 100 + let stop ← choose Nat 100 200 + let canonical ← frequency [(1, pure true), (1, pure false)] + pure (Ix.SourceInfo.synthetic start stop canonical)), + ] + +instance : Shrinkable Ix.SourceInfo where + shrink si := match si with + | .none => [] + | _ => [.none] + +instance : SampleableExt Ix.SourceInfo := SampleableExt.mkSelfContained genIxSourceInfo + +/-- Generate Ix.SyntaxPreresolved with all variants -/ +def genIxSyntaxPreresolved : Gen Ix.SyntaxPreresolved := + frequency [ + (1, Ix.SyntaxPreresolved.namespace <$> genIxName 3), + (1, do + let name ← genIxName 3 + let numAliases ← Gen.choose Nat 0 3 + let mut aliases : Array String := #[] + for _ in [:numAliases] do + aliases := aliases.push (← genIxString) + pure (Ix.SyntaxPreresolved.decl name aliases)), + ] + +instance : Shrinkable Ix.SyntaxPreresolved where + shrink sp := match sp with + | .namespace n => .namespace <$> Shrinkable.shrink n + | .decl n aliases => + [.namespace n] ++ + (if aliases.size > 0 then [.decl n aliases.pop] else []) ++ + ((.decl · aliases) <$> Shrinkable.shrink n) +instance : SampleableExt Ix.SyntaxPreresolved := SampleableExt.mkSelfContained genIxSyntaxPreresolved + +/-- Generate Ix.Syntax with all variants including node -/ +def genIxSyntaxAux : Nat → Gen Ix.Syntax + | 0 => frequency [ + (10, pure Ix.Syntax.missing), + (5, Ix.Syntax.atom <$> genIxSourceInfo <*> genIxString), + (5, do + let info ← genIxSourceInfo + let rawVal ← genIxSubstring + let name ← genIxName 2 + let numPreresolved ← Gen.choose Nat 0 2 + let mut preresolved : Array Ix.SyntaxPreresolved := #[] + for _ in [:numPreresolved] do + preresolved := preresolved.push (← genIxSyntaxPreresolved) + pure (Ix.Syntax.ident info rawVal name preresolved)), + ] + | fuel + 1 => frequency [ + (10, pure Ix.Syntax.missing), + (5, Ix.Syntax.atom <$> genIxSourceInfo <*> genIxString), + (5, do + let info ← genIxSourceInfo + let rawVal ← genIxSubstring + let name ← genIxName 2 + let numPreresolved ← Gen.choose Nat 0 2 + let mut preresolved : Array Ix.SyntaxPreresolved := #[] + for _ in [:numPreresolved] do + preresolved := preresolved.push (← genIxSyntaxPreresolved) + pure (Ix.Syntax.ident info rawVal name preresolved)), + (3, do + let info ← genIxSourceInfo + let kind ← genIxName 2 + let numChildren ← Gen.choose Nat 0 3 + let mut children : Array Ix.Syntax := #[] + for _ in [:numChildren] do + children := children.push (← genIxSyntaxAux (fuel / 2)) + pure (Ix.Syntax.node info kind children)), + ] + +def genIxSyntax : Gen Ix.Syntax := genIxSyntaxAux 3 + +/-- Generate Ix.DataValue with all variants -/ +def genIxDataValue : Gen Ix.DataValue := + frequency [ + (10, Ix.DataValue.ofString <$> genIxString), + (10, Ix.DataValue.ofBool <$> frequency [(1, pure true), (1, pure false)]), + (10, Ix.DataValue.ofName <$> genIxName 3), + (10, Ix.DataValue.ofNat <$> choose Nat 0 1000), + (10, Ix.DataValue.ofInt <$> genIxInt), + (5, Ix.DataValue.ofSyntax <$> genIxSyntax), + ] + +/-- Generate Ix.Expr with all variants and deeper nesting -/ +def genIxExpr : Nat → Gen Ix.Expr + | 0 => Gen.frequency #[ + (3, do let idx ← choose Nat 0 20; pure (Ix.Expr.mkBVar idx)), + (2, do let u ← genIxLevel 3; pure (Ix.Expr.mkSort u)), + (2, do let n ← genIxName 3; pure (Ix.Expr.mkFVar n)), + (1, Ix.Expr.mkLit <$> genLiteral) + ] (pure (Ix.Expr.mkBVar 0)) + | fuel + 1 => Gen.frequency #[ + -- Base cases (weighted higher to ensure termination) + (4, do let idx ← choose Nat 0 20; pure (Ix.Expr.mkBVar idx)), + (2, do let u ← genIxLevel 4; pure (Ix.Expr.mkSort u)), + (2, do let n ← genIxName 4; pure (Ix.Expr.mkFVar n)), + (1, do let n ← genIxName 4; pure (Ix.Expr.mkMVar n)), + (2, Ix.Expr.mkLit <$> genLiteral), + -- Const with universe levels + (4, do + let n ← genIxName 4 + let numLevels ← choose Nat 0 4 + let mut levels : Array Ix.Level := #[] + for _ in [:numLevels] do + levels := levels.push (← genIxLevel 4) + pure (Ix.Expr.mkConst n levels)), + -- App - function application + (5, do + let f ← genIxExpr (fuel / 2) + let a ← genIxExpr (fuel / 2) + pure (Ix.Expr.mkApp f a)), + -- Lambda with varied binder info + (4, do + let n ← genIxName 3 + let bi ← genBinderInfo + let ty ← genIxExpr (fuel / 2) + let body ← genIxExpr (fuel / 2) + pure (Ix.Expr.mkLam n ty body bi)), + -- ForallE with varied binder info + (4, do + let n ← genIxName 3 + let bi ← genBinderInfo + let ty ← genIxExpr (fuel / 2) + let body ← genIxExpr (fuel / 2) + pure (Ix.Expr.mkForallE n ty body bi)), + -- LetE + (3, do + let n ← genIxName 3 + let ty ← genIxExpr (fuel / 3) + let val ← genIxExpr (fuel / 3) + let body ← genIxExpr (fuel / 3) + let nonDep ← frequency [(1, pure true), (1, pure false)] + pure (Ix.Expr.mkLetE n ty val body nonDep)), + -- MData with metadata + (2, do + let numEntries ← choose Nat 1 4 + let mut entries : Array (Ix.Name × Ix.DataValue) := #[] + for _ in [:numEntries] do + let key ← genIxName 2 + let val ← genIxDataValue + entries := entries.push (key, val) + let e ← genIxExpr (fuel / 2) + pure (Ix.Expr.mkMData entries e)), + -- Proj + (2, do + let typeName ← genIxName 4 + let idx ← choose Nat 0 10 + let struct ← genIxExpr (fuel / 2) + pure (Ix.Expr.mkProj typeName idx struct)) + ] (pure (Ix.Expr.mkBVar 0)) + +instance : SampleableExt Ix.Name := SampleableExt.mkSelfContained (genIxName 5) + +instance : Shrinkable Ix.Level where + shrink l := match l with + | .zero _ => [] + | .succ x _ => [x] + | .max x y _ => [x, y] + | .imax x y _ => [x, y] + | .param _ _ => [Ix.Level.mkZero] + | .mvar _ _ => [Ix.Level.mkZero] + +instance : SampleableExt Ix.Level := SampleableExt.mkSelfContained (genIxLevel 5) + +instance : Shrinkable Ix.Expr where + shrink e := match e with + | .bvar _ _ => [] + | .fvar _ _ => [Ix.Expr.mkBVar 0] + | .mvar _ _ => [Ix.Expr.mkBVar 0] + | .sort _ _ => [Ix.Expr.mkBVar 0] + | .const _ _ _ => [Ix.Expr.mkBVar 0] + | .app f a _ => [f, a] + | .lam _ ty body _ _ => [ty, body] + | .forallE _ ty body _ _ => [ty, body] + | .letE _ ty val body _ _ => [ty, val, body] + | .lit _ _ => [Ix.Expr.mkBVar 0] + | .mdata _ e _ => [e] + | .proj _ _ e _ => [e] + +instance : SampleableExt Ix.Expr := SampleableExt.mkSelfContained (genIxExpr 5) + +/-- Generate an array of level parameter names with varied sizes -/ +def genLevelParams : Gen (Array Ix.Name) := do + let numParams ← choose Nat 0 5 + let mut params : Array Ix.Name := #[] + for i in [:numParams] do + -- Use varied names, not just u, v, w + let baseName ← frequency [ + (3, pure "u"), + (3, pure "v"), + (2, pure "w"), + (2, pure "α"), + (2, pure "β"), + ] + params := params.push (Ix.Name.mkStr Ix.Name.mkAnon s!"{baseName}{i}") + pure params + +/-- Generate a random Ix.ConstantVal with varied complexity -/ +def genIxConstantVal : Gen Ix.ConstantVal := + Ix.ConstantVal.mk <$> genIxName 5 <*> genLevelParams <*> genIxExpr 5 + +/-- Generate a random Ix.AxiomVal -/ +def genIxAxiomVal : Gen Ix.AxiomVal := + Ix.AxiomVal.mk <$> genIxConstantVal <*> frequency [(9, pure false), (1, pure true)] + +/-- Generate ReducibilityHints -/ +def genReducibilityHints : Gen Lean.ReducibilityHints := + frequency [ + (3, pure .opaque), + (3, pure .abbrev), + (4, Lean.ReducibilityHints.regular <$> genUInt32), + ] + +/-- Generate DefinitionSafety -/ +def genDefinitionSafety : Gen Lean.DefinitionSafety := + frequency [ + (8, pure .safe), + (1, pure .unsafe), + (1, pure .partial), + ] + +/-- Generate an array of mutually recursive names -/ +def genMutualNames (baseName : Ix.Name) : Gen (Array Ix.Name) := do + let numMutual ← choose Nat 1 4 + let mut names : Array Ix.Name := #[baseName] + for i in [1:numMutual] do + names := names.push (Ix.Name.mkStr baseName s!"_mutual_{i}") + pure names + +/-- Generate a random Ix.DefinitionVal -/ +def genIxDefinitionVal : Gen Ix.DefinitionVal := do + let cnst ← genIxConstantVal + let value ← genIxExpr 5 + let hints ← genReducibilityHints + let safety ← genDefinitionSafety + let all ← genMutualNames cnst.name + pure { cnst, value, hints, safety, all } + +/-- Generate a random Ix.TheoremVal -/ +def genIxTheoremVal : Gen Ix.TheoremVal := do + let cnst ← genIxConstantVal + let value ← genIxExpr 5 + let all ← genMutualNames cnst.name + pure { cnst, value, all } + +/-- Generate a random Ix.OpaqueVal -/ +def genIxOpaqueVal : Gen Ix.OpaqueVal := do + let cnst ← genIxConstantVal + let value ← genIxExpr 5 + let isUnsafe ← frequency [(9, pure false), (1, pure true)] + let all ← genMutualNames cnst.name + pure { cnst, value, isUnsafe, all } + +/-- Generate QuotKind -/ +def genQuotKind : Gen Lean.QuotKind := + frequency [ + (1, pure .type), + (1, pure .ctor), + (1, pure .lift), + (1, pure .ind), + ] + +/-- Generate a random Ix.QuotVal -/ +def genIxQuotVal : Gen Ix.QuotVal := + Ix.QuotVal.mk <$> genIxConstantVal <*> genQuotKind + +/-- Generate constructor names for an inductive -/ +def genConstructorNames (inductName : Ix.Name) : Gen (Array Ix.Name) := do + let numCtors ← choose Nat 1 5 + let mut ctors : Array Ix.Name := #[] + let ctorNames := #["mk", "nil", "cons", "zero", "succ", "inl", "inr", "intro", "refl"] + for i in [:numCtors] do + let suffix := if i < ctorNames.size then ctorNames[i]! else s!"ctor{i}" + ctors := ctors.push (Ix.Name.mkStr inductName suffix) + pure ctors + +/-- Generate a random Ix.InductiveVal -/ +def genIxInductiveVal : Gen Ix.InductiveVal := do + let cnst ← genIxConstantVal + let numParams ← choose Nat 0 5 + let numIndices ← choose Nat 0 3 + let isRec ← frequency [(6, pure false), (4, pure true)] + let isUnsafe ← frequency [(9, pure false), (1, pure true)] + let isReflexive ← frequency [(7, pure false), (3, pure true)] + let numNested ← choose Nat 0 3 + let all ← genMutualNames cnst.name + let ctors ← genConstructorNames cnst.name + pure { + cnst + numParams + numIndices + all + ctors + numNested + isRec + isUnsafe + isReflexive + } + +/-- Generate a random Ix.ConstructorVal -/ +def genIxConstructorVal : Gen Ix.ConstructorVal := do + let cnst ← genIxConstantVal + let induct ← genIxName 5 + let cidx ← choose Nat 0 10 + let numParams ← choose Nat 0 5 + let numFields ← choose Nat 0 8 + let isUnsafe ← frequency [(9, pure false), (1, pure true)] + pure { cnst, induct, cidx, numParams, numFields, isUnsafe } + +/-- Generate a random Ix.RecursorRule -/ +def genIxRecursorRule : Gen Ix.RecursorRule := do + let ctor ← genIxName 5 + let nfields ← choose Nat 0 8 + let rhs ← genIxExpr 5 + pure { ctor, nfields, rhs } + +/-- Generate a random Ix.RecursorVal -/ +def genIxRecursorVal : Gen Ix.RecursorVal := do + let cnst ← genIxConstantVal + let all ← genMutualNames cnst.name + let numParams ← choose Nat 0 5 + let numIndices ← choose Nat 0 3 + let numMotives ← choose Nat 1 4 + let numMinors ← choose Nat 0 6 + let numRules ← choose Nat 1 5 + let mut rules : Array Ix.RecursorRule := #[] + for _ in [:numRules] do + rules := rules.push (← genIxRecursorRule) + let k ← frequency [(7, pure false), (3, pure true)] + let isUnsafe ← frequency [(9, pure false), (1, pure true)] + pure { cnst, all, numParams, numIndices, numMotives, numMinors, rules, k, isUnsafe } + +instance : Inhabited Ix.ConstantInfo where + default := .axiomInfo { cnst := { name := Ix.Name.mkAnon, levelParams := #[], type := Ix.Expr.mkSort Ix.Level.mkZero }, isUnsafe := false } + +/-- Generate a random Ix.ConstantInfo with all variants -/ +def genIxConstantInfo : Gen Ix.ConstantInfo := + frequency [ + (15, Ix.ConstantInfo.axiomInfo <$> genIxAxiomVal), + (15, Ix.ConstantInfo.defnInfo <$> genIxDefinitionVal), + (10, Ix.ConstantInfo.thmInfo <$> genIxTheoremVal), + (10, Ix.ConstantInfo.opaqueInfo <$> genIxOpaqueVal), + (10, Ix.ConstantInfo.quotInfo <$> genIxQuotVal), + (15, Ix.ConstantInfo.inductInfo <$> genIxInductiveVal), + (15, Ix.ConstantInfo.ctorInfo <$> genIxConstructorVal), + (10, Ix.ConstantInfo.recInfo <$> genIxRecursorVal), + ] + +instance : Shrinkable Ix.ConstantInfo where + shrink info := + -- Shrink to a simple axiom + let simpleName := Ix.Name.mkAnon + let simpleType := Ix.Expr.mkSort Ix.Level.mkZero + let simpleCnst : Ix.ConstantVal := { name := simpleName, levelParams := #[], type := simpleType } + match info with + | .axiomInfo _ => [] + | _ => [.axiomInfo { cnst := simpleCnst, isUnsafe := false }] + +instance : SampleableExt Ix.ConstantInfo := SampleableExt.mkSelfContained genIxConstantInfo + +/-! ## Generators for Ix.RawEnvironment -/ + +/-- Generate small arrays for RawEnvironment to avoid memory issues -/ +def genSmallArray (g : Gen α) : Gen (Array α) := + resize (fun s => if s > 3 then 3 else s / 2) <| + Array.mk <$> (listOf g >>= fun l => pure (l.take 3)) + +/-- Generate a simple ConstantInfo (only axiomInfo for FFI stability) -/ +def genSimpleConstantInfo : Gen Ix.ConstantInfo := + Ix.ConstantInfo.axiomInfo <$> genIxAxiomVal + +/-- Generate a (Name × ConstantInfo) pair for RawEnvironment -/ +def genNameConstantPair : Gen (Ix.Name × Ix.ConstantInfo) := + Prod.mk <$> genIxName 3 <*> genSimpleConstantInfo + +/-- Generate a RawEnvironment with small arrays to avoid memory issues -/ +def genIxRawEnvironment : Gen Ix.RawEnvironment := + Ix.RawEnvironment.mk <$> genSmallArray genNameConstantPair + +instance : Shrinkable Ix.RawEnvironment where + shrink env := if env.consts.isEmpty then [] else [{ consts := env.consts.pop }] + +instance : SampleableExt Ix.RawEnvironment := SampleableExt.mkSelfContained genIxRawEnvironment + +/-! ## Generators for Additional Ix Types -/ + +def genAddress : Gen Address := Tests.Gen.Ixon.genAddress + +instance : Shrinkable Address where shrink _ := [] +instance : SampleableExt Address := SampleableExt.mkSelfContained genAddress + +-- Ix.Int already has genIxInt defined earlier +instance : Shrinkable Ix.Int where + shrink i := match i with + | .ofNat n => if n > 0 then [.ofNat (n / 2)] else [] + | .negSucc n => [.ofNat 0] ++ if n > 0 then [.negSucc (n / 2)] else [] + +instance : SampleableExt Ix.Int := SampleableExt.mkSelfContained genIxInt + +-- Ix.Syntax already has genIxSyntax defined earlier +instance : Shrinkable Ix.Syntax where + shrink s := match s with + | .missing => [] + | _ => [.missing] + +instance : SampleableExt Ix.Syntax := SampleableExt.mkSelfContained genIxSyntax + +-- Ix.DataValue already has genIxDataValue defined earlier +instance : Shrinkable Ix.DataValue where + shrink dv := match dv with + | .ofBool _ => [] + | _ => [.ofBool true] + +instance : SampleableExt Ix.DataValue := SampleableExt.mkSelfContained genIxDataValue + +/-! ## Generators for RustCondensedBlocks and RustCompilePhases -/ + +/-- Generate a (Name × Name) pair for lowLinks -/ +def genNamePair : Gen (Ix.Name × Ix.Name) := + Prod.mk <$> genIxName 3 <*> genIxName 3 + +/-- Generate a (Name × Array Name) pair for blocks/blockRefs -/ +def genNameArrayPair : Gen (Ix.Name × Array Ix.Name) := do + let name ← genIxName 3 + let arr ← genSmallArray (genIxName 3) + pure (name, arr) + +/-- Generate Ix.RustCondensedBlocks -/ +def genRustCondensedBlocks : Gen Ix.RustCondensedBlocks := + Ix.RustCondensedBlocks.mk + <$> genSmallArray genNamePair + <*> genSmallArray genNameArrayPair + <*> genSmallArray genNameArrayPair + +instance : Shrinkable Ix.RustCondensedBlocks where + shrink cb := + (if cb.lowLinks.size > 0 then [{ cb with lowLinks := cb.lowLinks.pop }] else []) ++ + (if cb.blocks.size > 0 then [{ cb with blocks := cb.blocks.pop }] else []) ++ + (if cb.blockRefs.size > 0 then [{ cb with blockRefs := cb.blockRefs.pop }] else []) + +instance : SampleableExt Ix.RustCondensedBlocks := SampleableExt.mkSelfContained genRustCondensedBlocks + +/-- Generate Ix.CompileM.RustCompilePhases -/ +def genRustCompilePhases : Gen Ix.CompileM.RustCompilePhases := + Ix.CompileM.RustCompilePhases.mk + <$> genIxRawEnvironment + <*> genRustCondensedBlocks + <*> Tests.Gen.Ixon.genRawEnv + +instance : Shrinkable Ix.CompileM.RustCompilePhases where + shrink p := + -- Shrink to empty structures + let empty : Ix.CompileM.RustCompilePhases := { + rawEnv := { consts := #[] }, + condensed := { lowLinks := #[], blocks := #[], blockRefs := #[] }, + compileEnv := { consts := #[], named := #[], blobs := #[], comms := #[] } + } + if p.rawEnv.consts.isEmpty && p.condensed.lowLinks.isEmpty && p.compileEnv.consts.isEmpty + then [] + else [empty] + +instance : SampleableExt Ix.CompileM.RustCompilePhases := SampleableExt.mkSelfContained genRustCompilePhases + +/-! ## Generators for SerializeError, CompileError, and DecompileError -/ + +instance : Inhabited Ixon.SerializeError where + default := .addressError + +instance : Inhabited Ix.DecompileM.DecompileError where + default := .badConstantFormat "" + +instance : Inhabited Ix.CompileM.CompileError where + default := .missingConstant "" + +/-- Generate a SerializeError with all variants -/ +def genSerializeError : Gen Ixon.SerializeError := do + let s ← genIxString + let byte ← Gen.choose Nat 0 255 + let idx ← Gen.choose Nat 0 100 + let len ← Gen.choose Nat 0 100 + Gen.frequency #[ + (1, pure (.unexpectedEof s)), + (1, pure (.invalidTag byte.toUInt8 s)), + (1, pure (.invalidFlag byte.toUInt8 s)), + (1, pure (.invalidVariant idx.toUInt64 s)), + (1, pure (.invalidBool byte.toUInt8)), + (1, pure .addressError), + (1, pure (.invalidShareIndex idx.toUInt64 len)) + ] (pure default) + +instance : Shrinkable Ixon.SerializeError where + shrink e := match e with + | .addressError => [] + | _ => [.addressError] + +instance : SampleableExt Ixon.SerializeError := + SampleableExt.mkSelfContained genSerializeError + +/-- Generate a DecompileError with all variants -/ +def genDecompileError : Gen Ix.DecompileM.DecompileError := do + let addr ← genAddress + let idx ← Gen.choose Nat 0 100 + let len ← Gen.choose Nat 0 100 + let s ← genIxString + let se ← genSerializeError + Gen.frequency #[ + (1, pure (.invalidRefIndex idx.toUInt64 len s)), + (1, pure (.invalidUnivIndex idx.toUInt64 len s)), + (1, pure (.invalidShareIndex idx.toUInt64 len s)), + (1, pure (.invalidRecIndex idx.toUInt64 len s)), + (1, pure (.invalidUnivVarIndex idx.toUInt64 len s)), + (1, pure (.missingAddress addr)), + (1, pure (.missingMetadata addr)), + (1, pure (.blobNotFound addr)), + (1, do let expected ← genIxString; pure (.badBlobFormat addr expected)), + (1, pure (.badConstantFormat s)), + (1, pure (.serializeError se)) + ] (pure default) + +instance : Shrinkable Ix.DecompileM.DecompileError where + shrink e := match e with + | .badConstantFormat s => if s.isEmpty then [] else [.badConstantFormat ""] + | .serializeError se => + [.badConstantFormat ""] ++ (.serializeError <$> Shrinkable.shrink se) + | _ => [.badConstantFormat ""] + +instance : SampleableExt Ix.DecompileM.DecompileError := + SampleableExt.mkSelfContained genDecompileError + +/-- Generate a CompileError with all variants -/ +def genCompileError : Gen Ix.CompileM.CompileError := do + let addr ← genAddress + let s ← genIxString + let se ← genSerializeError + Gen.frequency #[ + (1, pure (.missingConstant s)), + (1, pure (.missingAddress addr)), + (1, pure (.invalidMutualBlock s)), + (1, pure (.unsupportedExpr s)), + (1, do let s2 ← genIxString; pure (.unknownUnivParam s s2)), + (1, pure (.serializeError se)) + ] (pure default) + +instance : Shrinkable Ix.CompileM.CompileError where + shrink e := match e with + | .missingConstant s => if s.isEmpty then [] else [.missingConstant ""] + | .serializeError se => + [.missingConstant ""] ++ (.serializeError <$> Shrinkable.shrink se) + | _ => [.missingConstant ""] + +instance : SampleableExt Ix.CompileM.CompileError := + SampleableExt.mkSelfContained genCompileError + +end Tests.Gen.Ix diff --git a/Tests/Gen/Ixon.lean b/Tests/Gen/Ixon.lean new file mode 100644 index 00000000..e74b1eac --- /dev/null +++ b/Tests/Gen/Ixon.lean @@ -0,0 +1,491 @@ +/- + Generators for Ixon.* types (alpha-invariant serialization format). + Extracted from Tests/Ix/Ixon.lean. +-/ + +import LSpec +import Tests.Gen.Basic +import Ix.Ixon +import Ix.Address + +open LSpec SlimCheck Gen Ixon +open Ix (DefKind DefinitionSafety QuotKind) + +namespace Tests.Gen.Ixon + +/-! ## Basic Ixon generators -/ + +/-- Generate a random Address by hashing random bytes -/ +def genAddress : Gen Address := do + let mut bytes : ByteArray := ByteArray.empty + for _ in [:32] do + let b ← Gen.choose Nat 0 255 + bytes := bytes.push b.toUInt8 + pure ⟨(Blake3.hash bytes).val⟩ + +def genIxonNat : Gen Nat := USize.toNat <$> genUSize + +-- aggressively reduce size parameter to avoid tree blow-up +def genList (n: Gen α) : Gen (List α) := + resize (fun s => if s > 8 then 8 else s / 2) $ listOf n + +def genUInt64Small : Gen UInt64 := USize.toUInt64 <$> genUSize + +def genDefKind : Gen DefKind := + elements #[.defn, .opaq, .thm] + +def genDefinitionSafety : Gen DefinitionSafety := + elements #[.unsaf, .safe, .part] + +def genQuotKindNew : Gen QuotKind := + elements #[.type, .ctor, .lift, .ind] + +def genArray (g: Gen α) : Gen (Array α) := + Array.mk <$> genList g + +/-- Generate a universe level (new format) - non-recursive base cases heavily weighted -/ +partial def genUniv : Gen Univ := + resize (fun s => if s > 2 then 2 else s / 2) <| + frequency [ + (50, pure .zero), -- Heavily weighted base case + (20, .var <$> genUInt64Small), -- Another base case + (10, .succ <$> genUniv), + (5, .max <$> genUniv <*> genUniv), + (5, .imax <$> genUniv <*> genUniv), + ] + +/-- Generate an expression (new format) - non-recursive cases heavily weighted -/ +partial def genExpr : Gen Expr := + resize (fun s => if s > 2 then 2 else s / 2) <| + frequency [ + (30, .sort <$> genUInt64Small), -- Base cases heavily weighted + (30, .var <$> genUInt64Small), + (20, .str <$> genUInt64Small), + (20, .nat <$> genUInt64Small), + (20, .share <$> genUInt64Small), + (15, .ref <$> genUInt64Small <*> genArray genUInt64Small), + (15, .recur <$> genUInt64Small <*> genArray genUInt64Small), + (5, .prj <$> genUInt64Small <*> genUInt64Small <*> genExpr), + (5, .app <$> genExpr <*> genExpr), + (5, .lam <$> genExpr <*> genExpr), + (5, .all <$> genExpr <*> genExpr), + (2, .letE <$> genBool <*> genExpr <*> genExpr <*> genExpr), + ] + +def genDefinition : Gen Definition := + .mk <$> genDefKind <*> genDefinitionSafety <*> genUInt64Small <*> genExpr <*> genExpr + +def genAxiom : Gen Axiom := + .mk <$> genBool <*> genUInt64Small <*> genExpr + +def genQuotKind : Gen Lean.QuotKind := + elements #[.type, .ctor, .lift, .ind] + +def genQuotient : Gen Quotient := + .mk <$> genQuotKindNew <*> genUInt64Small <*> genExpr + +def genConstructorProj : Gen ConstructorProj := + .mk <$> genUInt64Small <*> genUInt64Small <*> genAddress + +def genRecursorProj : Gen RecursorProj := + .mk <$> genUInt64Small <*> genAddress + +def genInductiveProj : Gen InductiveProj := + .mk <$> genUInt64Small <*> genAddress + +def genDefinitionProj : Gen DefinitionProj := + .mk <$> genUInt64Small <*> genAddress + +def genRecursorRule : Gen RecursorRule := + .mk <$> genUInt64Small <*> genExpr + +def genRecursor : Gen Recursor := + .mk <$> genBool <*> genBool <*> genUInt64Small <*> genUInt64Small <*> genUInt64Small + <*> genUInt64Small <*> genUInt64Small <*> genExpr <*> genArray genRecursorRule + +def genConstructor : Gen Constructor := + .mk <$> genBool <*> genUInt64Small <*> genUInt64Small <*> genUInt64Small <*> genUInt64Small <*> genExpr + +def genInductive : Gen Inductive := + .mk <$> genBool <*> genBool <*> genBool <*> genUInt64Small <*> genUInt64Small + <*> genUInt64Small <*> genUInt64Small <*> genExpr <*> genArray genConstructor + + +def genBinderInfo : Gen Lean.BinderInfo := + elements #[.default, .implicit, .strictImplicit, .instImplicit] + +def genReducibilityHints : Gen Lean.ReducibilityHints := + frequency [ + (10, pure .opaque), + (10, pure .abbrev), + (10, .regular <$> genUInt32), + ] + + +/-- Generate small arrays for Constant to avoid memory issues -/ +def genSmallArray (g : Gen α) : Gen (Array α) := + resize (fun s => if s > 3 then 3 else s / 2) <| + Array.mk <$> genList g + +/-- Generate a MutConst (new format) -/ +def genMutConst : Gen MutConst := + frequency [ + (10, MutConst.defn <$> genDefinition), + (5, MutConst.indc <$> genInductive), + (5, MutConst.recr <$> genRecursor), + ] + +/-- Generate a ConstantInfo (new format) -/ +def genConstantInfo : Gen ConstantInfo := + frequency [ + (10, .defn <$> genDefinition), + (5, .recr <$> genRecursor), + (10, .axio <$> genAxiom), + (10, .quot <$> genQuotient), + (10, .cPrj <$> genConstructorProj), + (5, .rPrj <$> genRecursorProj), + (10, .iPrj <$> genInductiveProj), + (10, .dPrj <$> genDefinitionProj), + (5, .muts <$> genSmallArray genMutConst), + ] + +/-- Generate a Constant (new format) -/ +def genConstant : Gen Constant := + Constant.mk <$> genConstantInfo + <*> genSmallArray genExpr + <*> genSmallArray genAddress + <*> genSmallArray genUniv + +/-! ## Shrinkable instances -/ + +-- Simple enums - can't shrink +instance : Shrinkable DefKind where shrink _ := [] +instance : Shrinkable DefinitionSafety where shrink _ := [] +instance : Shrinkable QuotKind where shrink _ := [] + +-- Recursive types - shrink by returning sub-terms / halving indices +instance : Shrinkable Univ where + shrink u := match u with + | .zero => [] + | .succ inner => [inner] + | .max a b => [a, b] + | .imax a b => [a, b] + | .var idx => if idx > 0 then [.var (idx / 2), .zero] else [.zero] + +instance : Shrinkable Expr where + shrink e := match e with + | .sort idx => if idx > 0 then [.sort (idx / 2)] else [] + | .var idx => if idx > 0 then [.var (idx / 2)] else [] + | .ref ri us => (if us.size > 0 then [.ref ri us.pop] else []) ++ + (if ri > 0 then [.ref (ri / 2) us] else []) + | .recur ri us => (if us.size > 0 then [.recur ri us.pop] else []) ++ + (if ri > 0 then [.recur (ri / 2) us] else []) + | .prj ti fi val => [val] ++ (if fi > 0 then [.prj ti (fi / 2) val] else []) + | .str ri => if ri > 0 then [.str (ri / 2)] else [] + | .nat ri => if ri > 0 then [.nat (ri / 2)] else [] + | .app f a => [f, a] + | .lam ty body => [ty, body] + | .all ty body => [ty, body] + | .letE _ ty val body => [ty, val, body] + | .share idx => if idx > 0 then [.share (idx / 2)] else [] + +-- Struct types - shrink by simplifying expressions +instance : Shrinkable Definition where + shrink d := + (if d.typ != .sort 0 then [{ d with typ := .sort 0 }] else []) ++ + (if d.value != .var 0 then [{ d with value := .var 0 }] else []) ++ + (if d.lvls > 0 then [{ d with lvls := d.lvls / 2 }] else []) + +instance : Shrinkable Axiom where + shrink a := + (if a.typ != .sort 0 then [{ a with typ := .sort 0 }] else []) ++ + (if a.lvls > 0 then [{ a with lvls := a.lvls / 2 }] else []) + +instance : Shrinkable Quotient where + shrink q := + (if q.typ != .sort 0 then [{ q with typ := .sort 0 }] else []) ++ + (if q.lvls > 0 then [{ q with lvls := q.lvls / 2 }] else []) + +instance : Shrinkable RecursorRule where + shrink r := + (if r.rhs != .var 0 then [{ r with rhs := .var 0 }] else []) ++ + (if r.fields > 0 then [{ r with fields := r.fields / 2 }] else []) + +instance : Shrinkable Recursor where + shrink r := + (if r.rules.size > 0 then [{ r with rules := r.rules.pop }] else []) ++ + (if r.typ != .sort 0 then [{ r with typ := .sort 0 }] else []) + +instance : Shrinkable Constructor where + shrink c := + (if c.typ != .sort 0 then [{ c with typ := .sort 0 }] else []) ++ + (if c.lvls > 0 then [{ c with lvls := c.lvls / 2 }] else []) + +instance : Shrinkable Inductive where + shrink i := + (if i.ctors.size > 0 then [{ i with ctors := i.ctors.pop }] else []) ++ + (if i.typ != .sort 0 then [{ i with typ := .sort 0 }] else []) + +-- Projection types - shrink numeric fields +instance : Shrinkable InductiveProj where + shrink p := if p.idx > 0 then [{ p with idx := p.idx / 2 }] else [] + +instance : Shrinkable ConstructorProj where + shrink p := + (if p.idx > 0 then [{ p with idx := p.idx / 2 }] else []) ++ + (if p.cidx > 0 then [{ p with cidx := p.cidx / 2 }] else []) + +instance : Shrinkable RecursorProj where + shrink p := if p.idx > 0 then [{ p with idx := p.idx / 2 }] else [] + +instance : Shrinkable DefinitionProj where + shrink p := if p.idx > 0 then [{ p with idx := p.idx / 2 }] else [] + +-- Composite types - shrink to simpler variants +instance : Shrinkable MutConst where + shrink + | .defn d => .defn <$> Shrinkable.shrink d + | .indc i => [.defn ⟨.defn, .safe, 0, .sort 0, .sort 0⟩] ++ (.indc <$> Shrinkable.shrink i) + | .recr r => [.defn ⟨.defn, .safe, 0, .sort 0, .sort 0⟩] ++ (.recr <$> Shrinkable.shrink r) + +instance : Shrinkable ConstantInfo where + shrink + | .defn d => .defn <$> Shrinkable.shrink d + | .axio a => (.axio <$> Shrinkable.shrink a) ++ [.axio ⟨false, 0, .sort 0⟩] + | .quot q => (.quot <$> Shrinkable.shrink q) ++ [.axio ⟨false, 0, .sort 0⟩] + | .recr r => (.recr <$> Shrinkable.shrink r) ++ [.axio ⟨false, 0, .sort 0⟩] + | .cPrj p => .cPrj <$> Shrinkable.shrink p + | .rPrj p => .rPrj <$> Shrinkable.shrink p + | .iPrj p => .iPrj <$> Shrinkable.shrink p + | .dPrj p => .dPrj <$> Shrinkable.shrink p + | .muts ms => if ms.size > 0 then [.muts ms.pop] else [] + +instance : Shrinkable Constant where + shrink c := + (if c.sharing.size > 0 then [{ c with sharing := c.sharing.pop }] else []) ++ + (if c.refs.size > 0 then [{ c with refs := c.refs.pop }] else []) ++ + (if c.univs.size > 0 then [{ c with univs := c.univs.pop }] else []) + +-- DataValue - shrink to simpler variant +instance : Shrinkable DataValue where + shrink + | .ofBool _ => [] + | _ => [.ofBool true] + +/-! ## SampleableExt instances -/ + +instance : SampleableExt DefKind := SampleableExt.mkSelfContained genDefKind +instance : SampleableExt DefinitionSafety := SampleableExt.mkSelfContained genDefinitionSafety +instance : SampleableExt QuotKind := SampleableExt.mkSelfContained genQuotKindNew +instance : SampleableExt Univ := SampleableExt.mkSelfContained genUniv +instance : SampleableExt Expr := SampleableExt.mkSelfContained genExpr +instance : SampleableExt Definition := SampleableExt.mkSelfContained genDefinition +instance : SampleableExt Axiom := SampleableExt.mkSelfContained genAxiom +instance : SampleableExt Quotient := SampleableExt.mkSelfContained genQuotient +instance : SampleableExt RecursorRule := SampleableExt.mkSelfContained genRecursorRule +instance : SampleableExt Recursor := SampleableExt.mkSelfContained genRecursor +instance : SampleableExt Constructor := SampleableExt.mkSelfContained genConstructor +instance : SampleableExt Inductive := SampleableExt.mkSelfContained genInductive +instance : SampleableExt InductiveProj := SampleableExt.mkSelfContained genInductiveProj +instance : SampleableExt ConstructorProj := SampleableExt.mkSelfContained genConstructorProj +instance : SampleableExt RecursorProj := SampleableExt.mkSelfContained genRecursorProj +instance : SampleableExt DefinitionProj := SampleableExt.mkSelfContained genDefinitionProj +instance : SampleableExt MutConst := SampleableExt.mkSelfContained genMutConst +instance : SampleableExt ConstantInfo := SampleableExt.mkSelfContained genConstantInfo +instance : SampleableExt Constant := SampleableExt.mkSelfContained genConstant + +/-! ## Generators for Metadata Types -/ + +/-- Generate a DataValue. -/ +def genDataValueNew : Gen DataValue := + frequency [ + (10, .ofString <$> genAddress), + (10, .ofBool <$> genBool), + (10, .ofName <$> genAddress), + (10, .ofNat <$> genAddress), + (10, .ofInt <$> genAddress), + (10, .ofSyntax <$> genAddress), + ] + +instance : SampleableExt DataValue := SampleableExt.mkSelfContained genDataValueNew + +/-! ## Generators for Constant Metadata Types -/ + +/-- Generate a KVMap entry -/ +def genKVMapEntry : Gen (Address × DataValue) := + Prod.mk <$> genAddress <*> genDataValueNew + +/-- Generate a KVMap (key-value pairs for mdata) -/ +def genKVMap : Gen KVMap := + genSmallArray genKVMapEntry + +/-- Generate an ExprMetaData node with arena indices bounded by arenaSize -/ +def genExprMetaData (arenaSize : Nat := 0) : Gen ExprMetaData := + let genIdx : Gen UInt64 := + if arenaSize == 0 then pure 0 + else UInt64.ofNat <$> Gen.choose Nat 0 (arenaSize - 1) + frequency [ + (20, pure .leaf), + (15, ExprMetaData.app <$> genIdx <*> genIdx), + (15, ExprMetaData.binder <$> genAddress <*> genBinderInfo <*> genIdx <*> genIdx), + (10, ExprMetaData.letBinder <$> genAddress <*> genIdx <*> genIdx <*> genIdx), + (15, ExprMetaData.ref <$> genAddress), + (10, ExprMetaData.prj <$> genAddress <*> genIdx), + (5, ExprMetaData.mdata <$> genSmallArray genKVMap <*> genIdx), + ] + +/-- Generate a valid ExprMetaArena by building nodes bottom-up + so child indices always reference earlier entries. -/ +def genExprMetaArena : Gen ExprMetaArena := do + let numNodes ← Gen.choose Nat 0 6 + let mut arena : ExprMetaArena := {} + for _ in [:numNodes] do + let node ← genExprMetaData arena.nodes.size + arena := { nodes := arena.nodes.push node } + pure arena + +/-- Generate a ConstantMeta with all variants -/ +def genConstantMeta : Gen ConstantMeta := do + let arena ← genExprMetaArena + let genRoot : Gen UInt64 := + if arena.nodes.size == 0 then pure 0 + else UInt64.ofNat <$> Gen.choose Nat 0 (arena.nodes.size - 1) + frequency [ + (10, pure .empty), + (15, ConstantMeta.defn <$> genAddress <*> genSmallArray genAddress + <*> genReducibilityHints <*> genSmallArray genAddress <*> genSmallArray genAddress + <*> pure arena <*> genRoot <*> genRoot), + (15, ConstantMeta.axio <$> genAddress <*> genSmallArray genAddress + <*> pure arena <*> genRoot), + (10, ConstantMeta.quot <$> genAddress <*> genSmallArray genAddress + <*> pure arena <*> genRoot), + (15, ConstantMeta.indc <$> genAddress <*> genSmallArray genAddress <*> genSmallArray genAddress + <*> genSmallArray genAddress <*> genSmallArray genAddress + <*> pure arena <*> genRoot), + (15, ConstantMeta.ctor <$> genAddress <*> genSmallArray genAddress <*> genAddress + <*> pure arena <*> genRoot), + (15, ConstantMeta.recr <$> genAddress <*> genSmallArray genAddress <*> genSmallArray genAddress + <*> genSmallArray genAddress <*> genSmallArray genAddress + <*> pure arena <*> genRoot <*> genSmallArray genRoot), + ] + +instance : Shrinkable ExprMetaData where + shrink em := match em with + | .leaf => [] + | _ => [.leaf] + +instance : Shrinkable ExprMetaArena where + shrink arena := if arena.nodes.size > 0 then [{ nodes := arena.nodes.pop }] else [] + +instance : Shrinkable ConstantMeta where + shrink m := match m with + | .empty => [] + | _ => [.empty] + +instance : SampleableExt ExprMetaData := SampleableExt.mkSelfContained (genExprMetaData 5) +instance : SampleableExt ExprMetaArena := SampleableExt.mkSelfContained genExprMetaArena +instance : SampleableExt ConstantMeta := SampleableExt.mkSelfContained genConstantMeta + +/-- Generate a Named entry with proper metadata. -/ +def genNamed : Gen Named := + Named.mk <$> genAddress <*> genConstantMeta + +/-- Generate a Comm. -/ +def genCommNew : Gen Comm := + Comm.mk <$> genAddress <*> genAddress + +instance : Shrinkable Named where + shrink n := match n.constMeta with + | .empty => [] + | _ => [{ n with constMeta := .empty }] + +instance : Shrinkable Comm where + shrink _ := [] + +instance : SampleableExt Named := SampleableExt.mkSelfContained genNamed +instance : SampleableExt Comm := SampleableExt.mkSelfContained genCommNew + +/-! ## Generators for RawEnv Types -/ + +/-- Generate a ByteArray for blobs -/ +def genByteArray : Gen ByteArray := do + let len ← Gen.choose Nat 0 32 + let mut bytes : Array UInt8 := #[] + for _ in [:len] do + let b ← Gen.choose Nat 0 255 + bytes := bytes.push b.toUInt8 + pure (ByteArray.mk bytes) + +/-- Generate an Ix.Name for RawNamed -/ +def genIxName : Nat → Gen Ix.Name + | 0 => pure Ix.Name.mkAnon + | fuel + 1 => Gen.frequency #[ + (3, pure Ix.Name.mkAnon), + (5, do + let parent ← genIxName fuel + let s ← Gen.elements #["a", "b", "test", "foo", "bar"] + pure (Ix.Name.mkStr parent s)), + (2, do + let parent ← genIxName fuel + let n ← Gen.choose Nat 0 100 + pure (Ix.Name.mkNat parent n)) + ] (pure Ix.Name.mkAnon) + +/-- Generate a RawConst -/ +def genRawConst : Gen RawConst := + RawConst.mk <$> genAddress <*> genConstant + +/-- Generate a RawNamed with empty metadata (matching Rust test generator). + Metadata addresses must reference valid names in env.names for indexed serialization. -/ +def genRawNamed : Gen RawNamed := + RawNamed.mk <$> genIxName 3 <*> genAddress <*> pure .empty + +/-- Generate a RawBlob -/ +def genRawBlob : Gen RawBlob := + RawBlob.mk <$> genAddress <*> genByteArray + +/-- Generate a RawComm -/ +def genRawComm : Gen RawComm := + RawComm.mk <$> genAddress <*> genCommNew + +/-- Generate a RawNameEntry -/ +def genRawNameEntry : Gen RawNameEntry := + RawNameEntry.mk <$> genAddress <*> genIxName 3 + +/-- Generate a RawEnv with small arrays to avoid memory issues -/ +def genRawEnv : Gen RawEnv := + RawEnv.mk <$> genSmallArray genRawConst + <*> genSmallArray genRawNamed + <*> genSmallArray genRawBlob + <*> genSmallArray genRawComm + <*> genSmallArray genRawNameEntry + +instance : Shrinkable RawConst where + shrink rc := (fun c => { rc with const := c }) <$> Shrinkable.shrink rc.const + +instance : Shrinkable RawNamed where + shrink rn := match rn.constMeta with + | .empty => [] + | _ => [{ rn with constMeta := .empty }] + +instance : Shrinkable RawBlob where + shrink rb := if rb.bytes.size > 0 then [{ rb with bytes := ByteArray.empty }] else [] + +instance : Shrinkable RawComm where + shrink _ := [] + +instance : Shrinkable RawEnv where + shrink env := + (if env.consts.size > 0 then [{ env with consts := env.consts.pop }] else []) ++ + (if env.named.size > 0 then [{ env with named := env.named.pop }] else []) ++ + (if env.blobs.size > 0 then [{ env with blobs := env.blobs.pop }] else []) ++ + (if env.comms.size > 0 then [{ env with comms := env.comms.pop }] else []) ++ + (if env.names.size > 0 then [{ env with names := env.names.pop }] else []) + +instance : SampleableExt RawConst := SampleableExt.mkSelfContained genRawConst +instance : SampleableExt RawNamed := SampleableExt.mkSelfContained genRawNamed +instance : SampleableExt RawBlob := SampleableExt.mkSelfContained genRawBlob +instance : SampleableExt RawComm := SampleableExt.mkSelfContained genRawComm +instance : SampleableExt RawEnv := SampleableExt.mkSelfContained genRawEnv + +end Tests.Gen.Ixon diff --git a/Tests/Ix.lean b/Tests/Ix.lean deleted file mode 100644 index 5db23ab4..00000000 --- a/Tests/Ix.lean +++ /dev/null @@ -1,314 +0,0 @@ -import LSpec -import Ix.Ixon -import Ix.Address -import LSpec.SlimCheck.Gen -import LSpec -import Blake3 - -import Tests.Common -import Tests.Ix.Common -import Tests.Ix.Ixon -import Tests.Ix.IR - -open LSpec -open SlimCheck -open SlimCheck.Gen - -def serde [Ixon.Serialize A] [BEq A] (x: A) : Bool := - match Ixon.runGet Ixon.Serialize.get (Ixon.runPut <| Ixon.Serialize.put x) with - | .ok (y : A) => x == y - | _ => false - ---open Ix.TransportM --- ---def transportUniv (univ: Ix.Level): Bool := --- match EStateM.run (dematUniv univ) emptyDematState with --- | .ok ixon stt => --- let remat := (ReaderT.run (rematUniv ixon) { meta := stt.meta}) --- match EStateM.run remat (rematStateWithStore stt.store) with --- | .ok ix _ => univ == ix --- | .error _ _ => .false --- | .error _ _ => .false --- ---def transportExpr (x: Ix.Expr): Bool := --- match EStateM.run (dematExpr x) emptyDematState with --- | .ok ixon stt => --- let remat := (ReaderT.run (rematExpr ixon) { meta := stt.meta}) --- match EStateM.run remat (rematStateWithStore stt.store) with --- | .ok ix _ => x == ix --- | .error _ _ => .false --- | .error _ _ => .false --- ---def transportConst (x: Ix.Const): Bool := --- match EStateM.run (dematConst x) emptyDematState with --- | .ok ixon stt => --- let remat := (ReaderT.run (rematConst ixon) { meta := stt.meta}) --- match EStateM.run remat (rematStateWithStore stt.store) with --- | .ok ix _ => x == ix --- | .error _ _ => .false --- | .error _ _ => .false - ---def transportExpr' (x: Ix.Expr): Except TransportError Bool := --- match EStateM.run (dematExpr x) emptyDematState with --- | .ok ixon stt => --- let remat := (ReaderT.run (rematExpr ixon) { meta := stt.meta}) --- match EStateM.run remat emptyRematState with --- | .ok ix _ => .ok (x == ix) --- | .error e _ => .error e --- | .error e _ => .error e - ---def ffiConst (x: Ixon.IxonConst) : Bool := --- let bytes := (Ixon.runPut <| Ixon.Serialize.put x) --- Ixon.eqLeanRustSerialization x.ixon.toFFI bytes --- ---def ffiExpr (x: Ixon.IxonExpr) : Bool := --- let bytes := (Ixon.runPut <| Ixon.Serialize.put x) --- Ixon.eqLeanRustSerialization x.ixon.toFFI bytes - - -def myConfig : SlimCheck.Configuration where - numInst := 10000 - maxSize := 100 - traceDiscarded := true - traceSuccesses := true - traceShrink := true - traceShrinkCandidates := true - ---def dbg : IO UInt32 := do --- SlimCheck.Checkable.check (∀ x: Ix.Const, transportConst x) myConfig --- return 0 - ---def Test.Ix.unitTransport : TestSeq := --- testExprs.foldl (init := .done) fun tSeq x => --- tSeq ++ (test s!"transport {repr x}" $ Except.isOk (transportExpr' x)) - - -def Tests.Ix.suite : List LSpec.TestSeq := - [ --- check "metadatum serde" (∀ x : Ixon.Metadatum, serde x), --- check "metadata serde" (∀ x : Ixon.Metadata, serde x), --- check "universe serde" (∀ x : Ixon.Univ, serde x), --- check "universe transport" (∀ x : Ix.Level, transportUniv x), --- check "expr serde" (∀ x : Ixon.IxonExpr, serde x), --- check "expr transport" (∀ x : Ix.Expr, transportExpr x), --- check "expr ffi with Rust" (∀ x : Ixon.IxonExpr, ffiExpr x), - --check "axiom serde" (∀ x : Ixon.Axiom, serde x), - --check "recursor rule serde" (∀ x : Ixon.RecursorRule, serde x), - --check "recursor serde" (∀ x : Ixon.Recursor, serde x), - --check "constructor serde" (∀ x : Ixon.Constructor, serde x), --- check "claim serde" (∀ x : Claim, serde x), --- check "const ffi with Rust" (∀ x : Ixon.IxonConst, ffiConst x), --- check "const transport" (∀ x : Ix.Const, transportConst x), - ] - - -def hexVal? (c : Char) : Option UInt8 := - if '0' ≤ c ∧ c ≤ '9' then - some (UInt8.ofNat (c.toNat - '0'.toNat)) - else if 'a' ≤ c ∧ c ≤ 'f' then - some (UInt8.ofNat (10 + (c.toNat - 'a'.toNat))) - else if 'A' ≤ c ∧ c ≤ 'F' then - some (UInt8.ofNat (10 + (c.toNat - 'A'.toNat))) - else - none - -/-- Parse a hexadecimal string like `0xdead_beef_cafe_0123_4567_89ab_cdef` into a `ByteArray`. -Underscores are ignored; `0x`/`0X` prefix is optional. Panics on invalid input. -/ -def parseHex (x : String) : ByteArray := - -- drop optional 0x/0X - let x := - if x.startsWith "0x" || x.startsWith "0X" then x.drop 2 else x - -- remove underscores - let x := String.ofList (x.toList.filter (· ≠ '_')) - -- must have an even number of hex digits - if x.length % 2 = 1 then - panic! "parseHex: odd number of hex digits" - else - let n := x.length - let rec loop (i : Nat) (acc : ByteArray) : ByteArray := - if i < n then - -- safe since ASCII: `String.get!` indexes by chars - let c1 := String.Pos.Raw.get! x ⟨i⟩ - let c2 := String.Pos.Raw.get! x ⟨i+1⟩ - match hexVal? c1, hexVal? c2 with - | some hi, some lo => - let b : UInt8 := (hi <<< 4) ||| lo - loop (i + 2) (acc.push b) - | _, _ => - panic! s!"parseHex: invalid hex at positions {i}..{i+1}" - else - acc - loop 0 ByteArray.empty - -/-- Print a `ByteArray` as a lowercase hex string with a `0x` prefix. -/ -def printHex (ba : ByteArray) : String := - let hexdigits := "0123456789abcdef" - let rec go (i : Nat) (acc : String) : String := - if h : i < ba.size then - let b := ba.get! i - let hi := (b.toNat / 16) - let lo := (b.toNat % 16) - let acc := acc.push (String.Pos.Raw.get! hexdigits ⟨hi⟩) - let acc := acc.push (String.Pos.Raw.get! hexdigits ⟨lo⟩) - go (i + 1) acc - else acc - "0x" ++ go 0 "" - -def serde_is [Ixon.Serialize A] [BEq A] (x: A) (expect: String): Bool := Id.run do - let expected := parseHex expect - let bytes := Ixon.runPut (Ixon.Serialize.put x) - if bytes == expected then - match Ixon.runGet Ixon.Serialize.get bytes with - | .ok (y : A) => x == y - | _ => false - else false - -def test_serde [Ixon.Serialize A] [BEq A] [Repr A] (x: A) (expect: String): LSpec.TestSeq := - let expected := parseHex expect - let bytes := Ixon.runPut (Ixon.Serialize.put x) - let res := if bytes == expected then - match Ixon.runGet Ixon.Serialize.get bytes with - | .ok (y : A) => x == y - | _ => false - else false - test s!"serde {repr x} <-> {expect}" res - -open Ixon - - ----- TODO ---def bad : Ixon := Ixon.meta <| .mk [ --- (0, [Metadatum.name `d, .link default, .hints (.regular 576554452), .link default]), --- (1, [.info .instImplicit, .info .instImplicit, .info .strictImplicit]), --- (2, [.all [.mkNum .anonymous 165851424810452359], .info .default]), --- (3, []), --- (4, []), --- (5, [.hints .opaque]), --- (6, [.name <| .mkNum .anonymous 871843802607008850]), --- ] - ---#eval printHex <| runPut <| Serialize.put bad ---"0xe0a78100a400a1716402af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f32620302d4855d2202af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f32628101a30103010301028102a204a1a1880887c551fdfd384d0201008103a08104a08105a103008106a100a18808523c04ba5169190c" - ---#eval runGet (@Serialize.get Ixon _) (runPut (Serialize.put bad)) - - ---def Tests.Ixon.units : List LSpec.TestSeq := --- [ --- test_serde (Ixon.vari 0x0) "0x00", --- test_serde (Ixon.vari 0x7) "0x07", --- test_serde (Ixon.vari 0x8) "0x0808", --- test_serde (Ixon.vari 0xFF) "0x08FF", --- test_serde (Ixon.vari 0x0100) "0x090001", --- test_serde (Ixon.vari 0x0100) "0x090001", --- test_serde (Ixon.vari 0xFFFF) "0x09FFFF", --- test_serde (Ixon.vari 0x010000) "0x0A000001", --- test_serde (Ixon.vari 0xFFFFFF) "0x0AFFFFFF", --- test_serde (Ixon.vari 0x01000000) "0x0B00000001", --- test_serde (Ixon.vari 0xFFFFFFFF) "0x0BFFFFFFFF", --- test_serde (Ixon.vari 0x0100000000) "0x0C0000000001", --- test_serde (Ixon.vari 0xFFFFFFFFFF) "0x0CFFFFFFFFFF", --- test_serde (Ixon.vari 0x010000000000) "0x0D000000000001", --- test_serde (Ixon.vari 0xFFFFFFFFFFFF) "0x0DFFFFFFFFFFFF", --- test_serde (Ixon.vari 0x01000000000000) "0x0E00000000000001", --- test_serde (Ixon.vari 0xFFFFFFFFFFFFFF) "0x0EFFFFFFFFFFFFFF", --- test_serde (Ixon.vari 0x0100000000000000) "0x0F0000000000000001", --- test_serde (Ixon.vari 0xFFFFFFFFFFFFFFFF) "0x0FFFFFFFFFFFFFFFFF", --- test_serde (Ixon.sort <| .const 0x0) "0x9000", --- test_serde (Ixon.sort <| .const 0x1F) "0x901F", --- test_serde (Ixon.sort <| .const 0x20) "0x902020", --- test_serde (Ixon.sort <| .const 0xFF) "0x9020FF", --- test_serde (Ixon.sort <| .const 0x0100) "0x90210001", --- test_serde (Ixon.sort <| .const 0xFFFF) "0x9021FFFF", --- test_serde (Ixon.sort <| .const 0x010000) "0x9022000001", --- test_serde (Ixon.sort <| .const 0xFFFFFF) "0x9022FFFFFF", --- test_serde (Ixon.sort <| .const 0x01000000) "0x902300000001", --- test_serde (Ixon.sort <| .const 0xFFFFFFFF) "0x9023FFFFFFFF", --- test_serde (Ixon.sort <| .const 0x0100000000) "0x90240000000001", --- test_serde (Ixon.sort <| .const 0xFFFFFFFFFF) "0x9024FFFFFFFFFF", --- test_serde (Ixon.sort <| .const 0x010000000000) "0x9025000000000001", --- test_serde (Ixon.sort <| .const 0xFFFFFFFFFFFF) "0x9025FFFFFFFFFFFF", --- test_serde (Ixon.sort <| .const 0x01000000000000) "0x902600000000000001", --- test_serde (Ixon.sort <| .const 0xFFFFFFFFFFFFFF) "0x9026FFFFFFFFFFFFFF", --- test_serde (Ixon.sort <| .const 0x0100000000000000) "0x90270000000000000001", --- test_serde (Ixon.sort <| .const 0xFFFFFFFFFFFFFFFF) "0x9027FFFFFFFFFFFFFFFF", --- test_serde (Ixon.sort <| .var 0x0) "0x9040", --- test_serde (Ixon.sort <| .var 0x1F) "0x905F", --- test_serde (Ixon.sort <| .var 0x20) "0x906020", --- test_serde (Ixon.sort <| .var 0xFF) "0x9060FF", --- test_serde (Ixon.sort <| .var 0x0100) "0x90610001", --- test_serde (Ixon.sort <| .var 0xFFFFFFFFFFFFFFFF) "0x9067FFFFFFFFFFFFFFFF", --- test_serde (Ixon.sort <| .add 0x0 (.const 0x0)) "0x908000", --- test_serde (Ixon.sort <| .add 0x0 (.var 0x0)) "0x908040", --- test_serde (Ixon.sort <| .add 0x1F (.var 0x0)) "0x909F40", --- test_serde (Ixon.sort <| .add 0x20 (.var 0x0)) "0x90A02040", --- test_serde (Ixon.sort <| .add 0xFF (.var 0x0)) "0x90A0FF40", --- test_serde (Ixon.sort <| .add 0xFFFF_FFFF_FFFF_FFFF (.var 0x0)) "0x90A7FFFFFFFFFFFFFFFF40", --- test_serde (Ixon.sort <| .max (.var 0x0) (.var 0x0)) "0x90C04040", --- test_serde (Ixon.sort <| .max (.var 0x0) (.var 0x1)) "0x90C04041", --- test_serde (Ixon.sort <| .max (.var 0x1) (.var 0x0)) "0x90C04140", --- test_serde (Ixon.sort <| .max (.var 0x1) (.var 0x1)) "0x90C04141", --- test_serde (Ixon.sort <| .imax (.var 0x0) (.var 0x0)) "0x90C14040", --- test_serde (Ixon.sort <| .imax (.var 0x0) (.var 0x1)) "0x90C14041", --- test_serde (Ixon.sort <| .imax (.var 0x1) (.var 0x0)) "0x90C14140", --- test_serde (Ixon.sort <| .imax (.var 0x1) (.var 0x1)) "0x90C14141", --- test_serde (Ixon.sort <| .imax (.var 0x1) (.var 0x1)) "0x90C14141", --- test_serde (Ixon.refr (default) []) "0x10af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.refr (default) [.var 0x0]) "0x11af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f326240", --- test_serde (Ixon.recr 0x0 [.var 0x0]) "0x20A140", --- test_serde (Ixon.recr 0x0 [.var 0x0, .var 0x1]) "0x20A24041", --- test_serde (Ixon.apps (Ixon.vari 0x0) (Ixon.vari 0x1) []) "0x300001", --- test_serde (Ixon.apps (Ixon.vari 0x0) (Ixon.vari 0x1) [Ixon.vari 0x2]) "0x31000102", --- test_serde (Ixon.apps (Ixon.vari 0x0) (Ixon.vari 0x1) --- [ --- Ixon.vari 0x2, Ixon.vari 0x3, Ixon.vari 0x4, Ixon.vari 0x5, --- Ixon.vari 0x6, Ixon.vari 0x7, Ixon.vari 0x8, Ixon.vari 0x9, --- ]) "0x3808000102030405060708080809", --- test_serde (Ixon.lams [Ixon.vari 0x0] (Ixon.vari 0x1)) "0x410001", --- test_serde (Ixon.lams --- [ --- Ixon.vari 0x0, Ixon.vari 0x1, Ixon.vari 0x2, Ixon.vari 0x3, --- Ixon.vari 0x4, Ixon.vari 0x5, Ixon.vari 0x6, Ixon.vari 0x7, --- ] (Ixon.vari 0x8)) "0x480800010203040506070808", --- test_serde (Ixon.alls [Ixon.vari 0x0] (Ixon.vari 0x1)) "0x510001", --- test_serde (Ixon.alls --- [ --- Ixon.vari 0x0, Ixon.vari 0x1, Ixon.vari 0x2, Ixon.vari 0x3, --- Ixon.vari 0x4, Ixon.vari 0x5, Ixon.vari 0x6, Ixon.vari 0x7, --- ] (Ixon.vari 0x8)) "0x580800010203040506070808", --- test_serde (Ixon.proj (default) 0x0 (Ixon.vari 0x0)) "0x60af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f326200", --- test_serde (Ixon.proj (default) 0x8 (Ixon.vari 0x0)) "0x6808af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f326200", --- test_serde (Ixon.strl "") "0x70", --- test_serde (Ixon.strl "foobar") "0x76666f6f626172", --- test_serde (Ixon.natl 0x0) "0x8100", --- test_serde (Ixon.natl 0xFF) "0x81FF", --- test_serde (Ixon.natl 0x100) "0x820001", --- test_serde (Ixon.letE true (Ixon.vari 0x0) (Ixon.vari 0x1) (Ixon.vari 0x2)) "0x91000102", --- test_serde (Ixon.list []) "0xA0", --- test_serde (Ixon.list [Ixon.vari 0x0, Ixon.vari 0x1, Ixon.vari 0x2]) "0xA3000102", --- test_serde (Ixon.defn (.mk .definition .unsafe 0 (default) (default))) "0xB000008100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.defn (.mk .opaque .safe 1 default default)) "0xB001018101af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.axio ⟨true, 0, default⟩) "0xB1018100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.quot ⟨.type, 0, default⟩) "0xB2008100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.cprj ⟨0, 0, default⟩) "0xB381008100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.rprj ⟨0, 0, default⟩) "0xB481008100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.iprj ⟨0, default⟩) "0xB58100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.dprj ⟨0, default⟩) "0xB68100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.inds []) "0xC0", --- test_serde (Ixon.inds [⟨false, false, false, 0, 0, 0, 0, default, [], []⟩]) "0xC1008100810081008100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262A0A0", --- test_serde (Ixon.inds [⟨false, false, false, 0, 0, 0, 0, default, [⟨false, 0,0,0,0, default⟩], [⟨false, false, 0,0,0,0,0,default, [⟨0, default⟩]⟩]⟩]) "0xC1008100810081008100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262A1008100810081008100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262A10081008100810081008100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262A18100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.defs []) "0xD0", --- test_serde (Ixon.defs [⟨.definition, .unsafe, 0, default, default⟩]) "0xD100008100af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.meta ⟨[]⟩) "0xE0A0", --- test_serde (Ixon.meta ⟨[(0, [])]⟩) "0xE0A18100A0", --- test_serde (Ixon.meta ⟨[(0, [.name .anonymous])]⟩) "0xE0A18100A100A0", --- test_serde (Ixon.meta ⟨[(0, [.name `a])]⟩) "0xE0A18100A100A17161", --- test_serde (Ixon.meta ⟨[(0, [.name `a.b])]⟩) "0xE0A18100A100A271617162", --- test_serde (Ixon.meta ⟨[(0, [.name `a.b.c])]⟩) "0xE0A18100A100A3716171627163", --- test_serde (Ixon.meta ⟨[(0, [.name (.mkNum .anonymous 165851424810452359)])]⟩) "0xE0A18100A100A1880887C551FDFD384D02", --- test_serde (Ixon.meta ⟨[(0, [Metadatum.name `d, .link default, .hints (.regular 576554452), .link default])]⟩) "0xe0a18100a400a1716402af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f32620302d4855d2202af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", --- test_serde (Ixon.meta ⟨[(0, [.hints (.regular 42)])]⟩) "0xe0a18100a103022a000000", --- test_serde bad "0xe0a78100a400a1716402af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f32620302d4855d2202af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f32628101a30103010301028102a204a1a1880887c551fdfd384d0201008103a08104a08105a103008106a100a18808523c04ba5169190c" --- ] - - diff --git a/Tests/Ix/Canon.lean b/Tests/Ix/Canon.lean deleted file mode 100644 index 6de47583..00000000 --- a/Tests/Ix/Canon.lean +++ /dev/null @@ -1,107 +0,0 @@ ---import LSpec --- ---import Ix.Ixon ---import Ix.Address ---import Ix.Common ---import Ix.CanonM ---import Ix.Meta ---import Lean ---import Tests.Ix.Fixtures --- ---@[specialize] ---def withExceptOkM --- [Monad m] (descr : String) (exc : Except ε α) [ToString ε] (f : α → m LSpec.TestSeq) --- : m LSpec.TestSeq := --- match exc with --- | .error e => return LSpec.test descr (LSpec.ExpectationFailure "ok _" s!"error {e}") --- | .ok a => return LSpec.test descr true $ ← f a --- -----abbrev CanonTest := Ix.CanonM.CanonMState → LSpec.TestSeq -----abbrev IOCanonTest := Ix.CanonM.CanonMState → IO LSpec.TestSeq --- --- ---def Test.Ix.Canon.wellfounded : IO LSpec.TestSeq := do --- let env <- get_env! --- let stt <- match <- Ix.CanonM.canonicalizeDelta env.constants env.getDelta with --- | .error e => return LSpec.test "canonicalizeFailure" (LSpec.ExpectationFailure "ok _" s!"error {e}") --- | .ok stt => pure stt --- let (a,_) := stt.names.find! `WellFounded.A --- let (b,_) := stt.names.find! `WellFounded.A' --- return LSpec.test "A == A'" (a == b) - --- `WellFounded.A == `WellFounded.A' - ---def Tests.Ix.Canon.suite : List LSpec.TestSeq := --- [ --- --- ] - ---/-- Run tests from extractors given a Lean source file -/ ---def canonTestsFromFile (source : FilePath) --- (canon : List Extractor) (ioExtractors : List IOExtractor) --- (setPaths quick : Bool := true) : IO TestSeq := do --- if setPaths then Lean.setLibsPaths --- let leanEnv ← Lean.runFrontend (← IO.FS.readFile source) source --- let (constMap, delta) := leanEnv.getConstsAndDelta --- withExceptOkM s!"Content-addresses {source}" --- (← contAddr constMap delta quick false) fun stt => do --- let pureTests := extractors.foldl (init := .done) --- fun acc ext => acc ++ (ext stt) --- ioExtractors.foldlM (init := pureTests) fun acc ext => --- do pure $ acc ++ (← ext stt) - ---/-- Calls `ensembleTestExtractors` for multiple sources -/ ---def ensembleTestExtractors' (sources : List FilePath) --- (extractors : List Extractor) (ioExtractors : List IOExtractor) --- (setPaths : Bool := true) : IO TestSeq := --- sources.foldlM (init := .done) fun acc source => do --- let g := group s!"Tests for {source}" $ --- ← ensembleTestExtractors source extractors ioExtractors setPaths --- pure $ acc ++ g --- ---/-- Asserts that all constants typechecks -/ ---def extractTypecheckingTests : Extractor := fun stt => --- withExceptOk "Typechecking succeeds" (typecheckAll stt.store stt.env.constNames) --- fun _ => .done --- ---/-- Asserts that some constant doesn't typecheck -/ ---def extractNonTypecheckingTests : Extractor := fun stt => --- withExceptError "Typechecking fails" (typecheckAll stt.store stt.env.constNames) --- fun _ => .done --- ---section AnonHashGroups --- ---/- ---This section defines an extractor that consumes a list of groups of names and ---creates tests that assert that: ---1. Each pair of constants in the same group has the same anon hash ---2. Each pair of constants in different groups has different anon hashes ----/ --- ---def extractAnonGroups (groups : List (List Name)) (stt : ContAddrState) : --- Except String (Array (Array $ Name × Lurk.F)) := Id.run do --- let mut notFound : Array Name := #[] --- let mut hashGroups : Array (Array $ Name × Lurk.F) := #[] --- for group in groups do --- let mut hashGroup : Array (Name × Lurk.F) := #[] --- for name in group do --- match stt.env.consts.find? name with --- | none => notFound := notFound.push name --- | some h => hashGroup := hashGroup.push (name, h) --- hashGroups := hashGroups.push hashGroup --- if notFound.isEmpty then --- return .ok hashGroups --- else --- return .error s!"Not found: {", ".intercalate $ notFound.data.map toString}" --- ---def extractAnonGroupsTests (groups : List $ List Name) : Extractor := fun stt => --- withExceptOk "All constants can be found" (extractAnonGroups groups stt) --- fun anonGroups => --- let anonEqTests := anonGroups.foldl (init := .done) fun tSeq anonGroup => --- anonGroup.data.pairwise.foldl (init := tSeq) fun tSeq (x, y) => --- tSeq ++ test s!"{x.1}ₐₙₒₙ = {y.1}ₐₙₒₙ" (x.2 == y.2) --- anonGroups.data.pairwise.foldl (init := anonEqTests) fun tSeq (g, g') => --- (g.data.cartesian g'.data).foldl (init := tSeq) fun tSeq (x, y) => --- tSeq ++ test s!"{x.1}ₐₙₒₙ ≠ {y.1}ₐₙₒₙ" (x.2 != y.2) --- ---end AnonHashGroups diff --git a/Tests/Ix/CanonM.lean b/Tests/Ix/CanonM.lean new file mode 100644 index 00000000..e8c4d2fa --- /dev/null +++ b/Tests/Ix/CanonM.lean @@ -0,0 +1,505 @@ +/- + Unit tests for CanonM module - verifies canonicalization roundtrips. +-/ + +import Ix.CanonM +import Ix.Environment +import Ix.Meta +import LSpec + +open LSpec Ix.CanonM + +namespace Tests.CanonM + +/-! ## Name roundtrip tests -/ + +def testNameRoundtrip : TestSeq := + let names : List Lean.Name := [ + .anonymous, + .str .anonymous "foo", + .str .anonymous "bar", + .str (.str .anonymous "Foo") "bar", + .num .anonymous 0, + .num .anonymous 42, + .num (.str .anonymous "test") 123 + ] + group "name roundtrip" <| names.foldl (init := .done) fun acc n => + let stt : CanonState := {} + let (name, _stt') := StateT.run (canonName n) stt + let ustt : UncanonState := { names := {}, levels := {}, exprs := {} } + let (leanName, _) := StateT.run (uncanonName name) ustt + acc ++ test s!"{n}" (n == leanName) + +/-! ## Level roundtrip tests -/ + +def testLevelRoundtrip : TestSeq := + let levels : List Lean.Level := [ + .zero, + .succ .zero, + .succ (.succ .zero), + .max .zero .zero, + .max (.succ .zero) .zero, + .imax .zero .zero, + .param `u, + .param `v, + .max (.param `u) (.succ (.param `v)) + ] + group "level roundtrip" <| levels.foldl (init := .done) fun acc l => + let stt : CanonState := {} + let (level, _stt') := StateT.run (canonLevel l) stt + let ustt : UncanonState := { names := {}, levels := {}, exprs := {} } + let (leanLevel, _) := StateT.run (uncanonLevel level) ustt + acc ++ test s!"{l}" (l == leanLevel) + +/-! ## Expr roundtrip tests -/ + +def testExprRoundtrip : TestSeq := + let exprs : List Lean.Expr := [ + .bvar 0, + .bvar 42, + .sort .zero, + .sort (.succ .zero), + .const `Nat [], + .const `List [.zero], + .const `Eq [.param `u], + .app (.const `Nat.succ []) (.bvar 0), + .lam `x (.const `Nat []) (.bvar 0) .default, + .forallE `x (.const `Nat []) (.const `Nat []) .default, + .lit (.natVal 0), + .lit (.natVal 42), + .lit (.strVal "hello"), + .lit (.strVal "") + ] + group "expr roundtrip" <| exprs.foldl (init := .done) fun acc e => + let stt : CanonState := {} + let (expr, _stt') := StateT.run (canonExpr e) stt + let ustt : UncanonState := { names := {}, levels := {}, exprs := {} } + let (leanExpr, _) := StateT.run (uncanonExpr expr) ustt + acc ++ test s!"{e}" (e == leanExpr) + +/-! ## Hash determinism tests -/ + +def testHashDeterminism : TestSeq := + group "hash determinism" <| + let n1 := Ix.Name.mkAnon + let n2 := Ix.Name.mkAnon + test "mkAnon same hash" (n1.getHash == n2.getHash) ++ + let n3 := Ix.Name.mkStr Ix.Name.mkAnon "foo" + let n4 := Ix.Name.mkStr Ix.Name.mkAnon "foo" + test "mkStr same hash" (n3.getHash == n4.getHash) ++ + let n5 := Ix.Name.mkStr Ix.Name.mkAnon "foo" + let n6 := Ix.Name.mkStr Ix.Name.mkAnon "bar" + test "different strings different hash" (n5.getHash != n6.getHash) ++ + let l1 := Ix.Level.mkZero + let l2 := Ix.Level.mkZero + test "mkZero same hash" (l1.getHash == l2.getHash) ++ + let l3 := Ix.Level.mkSucc Ix.Level.mkZero + let l4 := Ix.Level.mkSucc Ix.Level.mkZero + test "mkSucc same hash" (l3.getHash == l4.getHash) + +/-! ## Interning tests -/ + +def testInterning : TestSeq := + group "interning" <| + -- Same Lean name should produce same pointer + let stt : CanonState := {} + let n := Lean.Name.mkStr .anonymous "test" + let (name1, stt') := StateT.run (canonName n) stt + let (name2, _) := StateT.run (canonName n) stt' + -- They should have the same hash + test "same name same hash" (name1.getHash == name2.getHash) + +/-! ## Full suite -/ + +def suite : TestSeq := + group "CanonM" <| + testNameRoundtrip ++ + testLevelRoundtrip ++ + testExprRoundtrip ++ + testHashDeterminism ++ + testInterning + +/-! ## Environment canonicalization test (IO) -/ + +/-- FFI to canonicalize environment in Rust and return Ix.RawEnvironment. + Takes the original environment as List (Lean.Name × Lean.ConstantInfo), + returns arrays of pairs that Lean converts to HashMaps. -/ +@[extern "rs_canonicalize_env_to_ix"] +opaque rsCanonicalizeEnvToIxRaw : + @& List (Lean.Name × Lean.ConstantInfo) → + IO Ix.RawEnvironment + +/-- Canonicalize environment in Rust and convert to Ix.Environment. -/ +def rsCanonicalizeEnvToIx (consts : List (Lean.Name × Lean.ConstantInfo)) : IO Ix.Environment := do + let raw ← rsCanonicalizeEnvToIxRaw consts + pure raw.toEnvironment + +/-! ## Ix.ConstantInfo comparison -/ + +/-- Compare two Ix.ConstantVal for equality. -/ +def ixConstantValEq (a b : Ix.ConstantVal) : Bool := + a.name == b.name && + a.levelParams == b.levelParams && + a.type == b.type + +/-- Compare two Ix.RecursorRule for equality. -/ +def ixRecursorRuleEq (a b : Ix.RecursorRule) : Bool := + a.ctor == b.ctor && a.nfields == b.nfields && a.rhs == b.rhs + +/-- Compare two Ix.ConstantInfo for full equality. -/ +def ixConstInfoEq (a b : Ix.ConstantInfo) : Bool := + match a, b with + | .axiomInfo v1, .axiomInfo v2 => + ixConstantValEq v1.cnst v2.cnst && v1.isUnsafe == v2.isUnsafe + | .defnInfo v1, .defnInfo v2 => + ixConstantValEq v1.cnst v2.cnst && + v1.value == v2.value && + v1.hints == v2.hints && + v1.safety == v2.safety && + v1.all == v2.all + | .thmInfo v1, .thmInfo v2 => + ixConstantValEq v1.cnst v2.cnst && + v1.value == v2.value && + v1.all == v2.all + | .opaqueInfo v1, .opaqueInfo v2 => + ixConstantValEq v1.cnst v2.cnst && + v1.value == v2.value && + v1.isUnsafe == v2.isUnsafe && + v1.all == v2.all + | .quotInfo v1, .quotInfo v2 => + ixConstantValEq v1.cnst v2.cnst && v1.kind == v2.kind + | .inductInfo v1, .inductInfo v2 => + ixConstantValEq v1.cnst v2.cnst && + v1.numParams == v2.numParams && + v1.numIndices == v2.numIndices && + v1.all == v2.all && + v1.ctors == v2.ctors && + v1.numNested == v2.numNested && + v1.isRec == v2.isRec && + v1.isUnsafe == v2.isUnsafe && + v1.isReflexive == v2.isReflexive + | .ctorInfo v1, .ctorInfo v2 => + ixConstantValEq v1.cnst v2.cnst && + v1.induct == v2.induct && + v1.cidx == v2.cidx && + v1.numParams == v2.numParams && + v1.numFields == v2.numFields && + v1.isUnsafe == v2.isUnsafe + | .recInfo v1, .recInfo v2 => + ixConstantValEq v1.cnst v2.cnst && + v1.all == v2.all && + v1.numParams == v2.numParams && + v1.numIndices == v2.numIndices && + v1.numMotives == v2.numMotives && + v1.numMinors == v2.numMinors && + v1.rules.size == v2.rules.size && + (v1.rules.zip v2.rules |>.all fun (r1, r2) => ixRecursorRuleEq r1 r2) && + v1.k == v2.k && + v1.isUnsafe == v2.isUnsafe + | _, _ => false + +/-- Describe which field differs between two Ix.ConstantInfo. -/ +def ixConstInfoDiff (a b : Ix.ConstantInfo) : String := + match a, b with + | .axiomInfo v1, .axiomInfo v2 => + if v1.cnst.name != v2.cnst.name then "name" + else if v1.cnst.levelParams != v2.cnst.levelParams then "levelParams" + else if v1.cnst.type != v2.cnst.type then "type" + else if v1.isUnsafe != v2.isUnsafe then "isUnsafe" + else "unknown" + | .defnInfo v1, .defnInfo v2 => + if v1.cnst.name != v2.cnst.name then "name" + else if v1.cnst.levelParams != v2.cnst.levelParams then "levelParams" + else if v1.cnst.type != v2.cnst.type then "type" + else if v1.value != v2.value then "value" + else if v1.hints != v2.hints then "hints" + else if v1.safety != v2.safety then "safety" + else if v1.all != v2.all then "all" + else "unknown" + | .thmInfo v1, .thmInfo v2 => + if v1.cnst.name != v2.cnst.name then "name" + else if v1.cnst.type != v2.cnst.type then "type" + else if v1.value != v2.value then "value" + else "unknown" + | _, _ => s!"variant mismatch: {a.getCnst.name} vs {b.getCnst.name}" + +/-! ## Consolidated canonicalization roundtrip test + +This test verifies the full canonicalization pipeline: +1. Get Lean.Environment +2. Canonicalize via Rust FFI → Ix.Environment +3. Compare Rust Ix.Environment against original env (iterate env.constants) +4. Canonicalize via Lean → Ix.Environment +5. Compare Lean Ix.Environment against Rust (should be identical) +6. Uncanonicalize back to Lean.Environment +7. Compare roundtripped environment to original (iterate env.constants) +-/ + +/-- Run the full canonicalization roundtrip test. -/ +def testFullCanonRoundtrip : TestSeq := + .individualIO "full canonicalization roundtrip" (do + let env ← get_env! + let numConsts := env.constants.toList.length + + IO.println s!"[Test] Starting canonicalization roundtrip test" + IO.println s!"[Test] Environment has {numConsts} constants" + IO.println "" + + -- Step 1: Canonicalize in Rust + IO.println s!"[Test] Step 1: Canonicalizing in Rust..." + let rustStart ← IO.monoMsNow + let rustIxEnv ← rsCanonicalizeEnvToIx env.constants.toList + let rustTime := (← IO.monoMsNow) - rustStart + IO.println s!"[Test] Rust: {rustIxEnv.consts.size} consts in {formatTime rustTime}" + IO.println "" + + -- Step 2: Canonicalize in Lean (parallel) + IO.println s!"[Test] Step 2: Canonicalizing in Lean (parallel)..." + let leanStart ← IO.monoMsNow + let leanIxConsts := canonEnvParallel env + IO.println s!"[Test] Lean: {leanIxConsts.size} consts" + let leanTime := (← IO.monoMsNow) - leanStart + IO.println s!" in {formatTime leanTime}" + IO.println "" + + -- Step 3: Compare Rust vs Lean by iterating env.constants + IO.println s!"[Test] Step 3: Comparing Rust vs Lean..." + let compareStart ← IO.monoMsNow + let mut mismatches := 0 + let mut rustMissing := 0 + let mut leanMissing := 0 + let mut processed := 0 + let mut lastReport := 0 + + for (name, _) in env.constants do + -- Compute canonical name to look up in both environments + let stt : CanonState := {} + let (ixName, _) := StateT.run (canonName name) stt + + -- Direct HashMap lookup (now that Hashable matches Rust) + let rustResult := rustIxEnv.consts.get? ixName + let leanResult := leanIxConsts.get? ixName + + match rustResult, leanResult with + | some rustConst, some leanConst => + if !ixConstInfoEq rustConst leanConst then + if mismatches < 5 then + let diff := ixConstInfoDiff rustConst leanConst + IO.println s!"[Test] Mismatch: {name} ({diff})" + mismatches := mismatches + 1 + | none, some _ => + if rustMissing < 5 then + IO.println s!"[Test] Missing in Rust: {name}" + rustMissing := rustMissing + 1 + | some _, none => + if leanMissing < 5 then + IO.println s!"[Test] Missing in Lean: {name}" + leanMissing := leanMissing + 1 + | none, none => + -- Both missing - this shouldn't happen since we're iterating env.constants + if mismatches < 5 then + IO.println s!"[Test] Missing in both: {name}" + mismatches := mismatches + 1 + + processed := processed + 1 + if processed - lastReport >= 10000 then + IO.print s!"\r[Test] Compared {processed}/{numConsts}... " + (← IO.getStdout).flush + lastReport := processed + + let compareTime := (← IO.monoMsNow) - compareStart + IO.println s!"\r[Test] Compared {processed}: {mismatches} mismatches, {rustMissing} missing in Rust, {leanMissing} missing in Lean ({formatTime compareTime})" + + if rustMissing > 0 || leanMissing > 0 || mismatches > 0 then + return (false, some s!"Rust vs Lean: {mismatches} mismatches, {rustMissing} missing in Rust, {leanMissing} missing in Lean") + IO.println "" + + -- Step 4: Uncanonicalize Lean's Ix constants back to Lean (parallel) + IO.println s!"[Test] Step 4: Uncanonicalize Lean's Ix constants (parallel)..." + let uncanonStart ← IO.monoMsNow + let roundtripped := uncanonEnvParallel leanIxConsts + IO.println s!"[Test] Uncanonicalized {roundtripped.size}" + let uncanonTime := (← IO.monoMsNow) - uncanonStart + IO.println s!" in {formatTime uncanonTime}" + IO.println "" + + -- Step 5: Compare roundtripped to original (parallel with pointer-pair caching) + IO.println s!"[Test] Step 5: Comparing roundtripped to original (parallel)..." + let verifyStart ← IO.monoMsNow + -- Convert env.constants (SMap) to HashMap for parallel comparison + let origMap : Std.HashMap Lean.Name Lean.ConstantInfo := + env.constants.fold (init := {}) fun acc name const => acc.insert name const + let (rtMismatches, rtMissing, mismatchNames, missingNames) := compareEnvsParallel origMap roundtripped + for name in missingNames.toList.take 5 do + IO.println s!"[Test] Missing after roundtrip: {name}" + for name in mismatchNames.toList.take 5 do + IO.println s!"[Test] Mismatch after roundtrip: {name}" + let verifyTime := (← IO.monoMsNow) - verifyStart + IO.println s!"[Test] Verified {numConsts}: {rtMissing} missing, {rtMismatches} mismatches ({formatTime verifyTime})" + IO.println "" + + -- Summary + let totalTime := rustTime + leanTime + compareTime + uncanonTime + verifyTime + let speedup := if rustTime > 0 then leanTime / rustTime else 0 + IO.println s!"[Test] Summary:" + IO.println s!"[Test] Total time: {formatTime totalTime}" + IO.println s!"[Test] Rust canonicalize: {formatTime rustTime}" + IO.println s!"[Test] Lean canonicalize: {formatTime leanTime}" + IO.println s!"[Test] Rust speedup: ~{speedup}x" + IO.println "" + + let success := rustMissing == 0 && leanMissing == 0 && mismatches == 0 && rtMissing == 0 && rtMismatches == 0 + let failMsg := if !success then + some s!"rustMissing={rustMissing}, leanMissing={leanMissing}, mismatches={mismatches}, rtMissing={rtMissing}, rtMismatches={rtMismatches}" + else none + + pure (success, failMsg) + ) .done + +/-! ## Pure Lean canonicalization roundtrip test + +This test verifies canonicalization and uncanonicalization work correctly +in pure Lean without any Rust FFI: +1. Get Lean.Environment +2. Canonicalize via Lean → Ix.Environment +3. Uncanonicalize back to Lean.Environment +4. Compare roundtripped environment to original +-/ + +/-- Run the pure Lean canonicalization roundtrip test. -/ +def testPureLeanRoundtrip : TestSeq := + .individualIO "pure Lean canonicalization roundtrip" (do + let env ← get_env! + let numConsts := env.constants.toList.length + + IO.println s!"[Test] Starting pure Lean canonicalization roundtrip test" + IO.println s!"[Test] Environment has {numConsts} constants" + IO.println "" + + -- Step 1: Canonicalize in Lean + IO.println s!"[Test] Step 1: Canonicalizing in Lean..." + let canonStart ← IO.monoMsNow + let (ixEnv, _) := StateT.run (canonEnv env) {} + IO.println s!"[Test] Canonicalized {ixEnv.consts.size} consts" + let canonTime := (← IO.monoMsNow) - canonStart + IO.println s!" in {formatTime canonTime}" + + -- Step 2: Uncanonicalize back to Lean + IO.println s!"[Test] Step 2: Uncanonicalize back to Lean..." + let uncanonStart ← IO.monoMsNow + let (env2, _) := StateT.run (uncanonEnv ixEnv) {} + IO.println s!"[Test] Uncanonicalized {env2.size} constants" + let uncanonTime := (← IO.monoMsNow) - uncanonStart + IO.println s!" in {formatTime uncanonTime}" + + -- Step 3: Compare roundtripped to original (parallel) + IO.println s!"[Test] Step 3: Comparing roundtripped to original (parallel)..." + let verifyStart ← IO.monoMsNow + let origMap : Std.HashMap Lean.Name Lean.ConstantInfo := + env.constants.fold (init := {}) fun acc name const => acc.insert name const + let (mismatches, missing, mismatchNames, missingNames) := compareEnvsParallel origMap env2 + for name in missingNames.toList.take 5 do + IO.println s!"[Test] Missing after roundtrip: {name}" + for name in mismatchNames.toList.take 5 do + IO.println s!"[Test] Mismatch after roundtrip: {name}" + let verifyTime := (← IO.monoMsNow) - verifyStart + IO.println s!"[Test] Verified {numConsts}: {missing} missing, {mismatches} mismatches ({formatTime verifyTime})" + IO.println "" + + -- Summary + let totalTime := canonTime + uncanonTime + verifyTime + IO.println s!"[Test] Summary:" + IO.println s!"[Test] Total time: {formatTime totalTime}" + IO.println s!"[Test] Canonicalize: {formatTime canonTime}" + IO.println s!"[Test] Uncanonicalize: {formatTime uncanonTime}" + IO.println s!"[Test] Verify: {formatTime verifyTime}" + IO.println "" + + let success := missing == 0 && mismatches == 0 + let failMsg := if !success then + some s!"missing={missing}, mismatches={mismatches}" + else none + + pure (success, failMsg) + ) .done + +/-! ## Parallel Lean canonicalization roundtrip test + +This test verifies parallel canonicalization using ShardMap: +1. Get Lean.Environment +2. Canonicalize via ParallelCanonM → Ix.Environment +3. Uncanonicalize back to Lean.Environment +4. Compare roundtripped environment to original +-/ + +/-- Run the parallel Lean canonicalization roundtrip test. -/ +def testParallelLeanRoundtrip : TestSeq := + .individualIO "parallel Lean canonicalization roundtrip" (do + let env ← get_env! + let numConsts := env.constants.toList.length + + IO.println s!"[Test] Starting parallel Lean canonicalization roundtrip test" + IO.println s!"[Test] Environment has {numConsts} constants" + IO.println "" + + -- Step 1: Canonicalize in parallel + IO.println s!"[Test] Step 1: Canonicalizing in parallel..." + let canonStart ← IO.monoMsNow + let ixConsts := canonEnvParallel env + IO.println s!"[Test] Canonicalized {ixConsts.size} consts" + let canonTime := (← IO.monoMsNow) - canonStart + IO.println s!" in {formatTime canonTime}" + + -- Step 2: Uncanonicalize back to Lean (parallel) + IO.println s!"[Test] Step 2: Uncanonicalize back to Lean (parallel)..." + let uncanonStart ← IO.monoMsNow + let env2 := uncanonEnvParallel ixConsts + IO.println s!"[Test] Uncanonicalized {env2.size} constants" + let uncanonTime := (← IO.monoMsNow) - uncanonStart + IO.println s!" in {formatTime uncanonTime}" + + -- Step 3: Compare roundtripped to original (parallel) + IO.println s!"[Test] Step 3: Comparing roundtripped to original (parallel)..." + let verifyStart ← IO.monoMsNow + let origMap : Std.HashMap Lean.Name Lean.ConstantInfo := + env.constants.fold (init := {}) fun acc name const => acc.insert name const + let (mismatches, missing, mismatchNames, missingNames) := compareEnvsParallel origMap env2 + for name in missingNames.toList.take 5 do + IO.println s!"[Test] Missing after roundtrip: {name}" + for name in mismatchNames.toList.take 5 do + IO.println s!"[Test] Mismatch after roundtrip: {name}" + let verifyTime := (← IO.monoMsNow) - verifyStart + IO.println s!"[Test] Verified {numConsts}: {missing} missing, {mismatches} mismatches ({formatTime verifyTime})" + IO.println "" + + -- Summary + let totalTime := canonTime + uncanonTime + verifyTime + IO.println s!"[Test] Summary:" + IO.println s!"[Test] Total time: {formatTime totalTime}" + IO.println s!"[Test] Canonicalize: {formatTime canonTime}" + IO.println s!"[Test] Uncanonicalize: {formatTime uncanonTime}" + IO.println s!"[Test] Verify: {formatTime verifyTime}" + IO.println "" + + let success := missing == 0 && mismatches == 0 + let failMsg := if !success then + some s!"missing={missing}, mismatches={mismatches}" + else none + + pure (success, failMsg) + ) .done + +/-- Full canonicalization roundtrip test suite (expensive, in ignored tests). -/ +def rustSuiteIO : List TestSeq := [ + testFullCanonRoundtrip, +] + +def serialSuiteIO : List TestSeq := [ + testPureLeanRoundtrip +] + +def parallelSuiteIO : List TestSeq := [ + testParallelLeanRoundtrip +] + +end Tests.CanonM diff --git a/Tests/Ix/Claim.lean b/Tests/Ix/Claim.lean new file mode 100644 index 00000000..f5d0e8ac --- /dev/null +++ b/Tests/Ix/Claim.lean @@ -0,0 +1,90 @@ +/- + Serialization roundtrip and encoding tests for Ix.Claim types. +-/ + +import Ix.Claim +import Tests.Gen.Claim + +open LSpec SlimCheck +open Ixon (runGet) +open Ix (RevealConstructorInfo RevealRecursorRule RevealConstantInfo Claim) + +/-! ## Roundtrip helper -/ + +def claimSerde (c : Claim) : Bool := + let bytes := Claim.ser c + match runGet Claim.get bytes with + | .ok c' => c == c' + | .error _ => false + +/-! ## Unit tests -/ + +private def addr1 : Address := Address.blake3 "hello".toUTF8 +private def addr2 : Address := Address.blake3 "world".toUTF8 +private def addr3 : Address := Address.blake3 "test".toUTF8 + +def claimUnits : TestSeq := + -- EvalClaim + test "EvalClaim roundtrip" (claimSerde (.eval addr1 addr2)) + -- CheckClaim + ++ test "CheckClaim roundtrip" (claimSerde (.check addr1)) + -- RevealClaim with defn revealing only safety + ++ test "RevealClaim defn safety-only" (claimSerde (.reveal addr1 + (.defn none (some .safe) none none none))) + -- RevealClaim with defn revealing all fields + ++ test "RevealClaim defn all fields" (claimSerde (.reveal addr1 + (.defn (some .defn) (some .safe) (some 3) (some addr2) (some addr3)))) + -- RevealClaim with axio revealing type + ++ test "RevealClaim axio with type" (claimSerde (.reveal addr1 + (.axio none none (some addr2)))) + -- RevealClaim with recr with rules + ++ test "RevealClaim recr with rules" (claimSerde (.reveal addr1 + (.recr (some true) none (some 2) none none none none none + (some #[⟨0, 3, addr2⟩])))) + -- RevealClaim with muts with component + ++ test "RevealClaim muts with component" (claimSerde (.reveal addr1 + (.muts #[(0, .defn (some .defn) (some .safe) none none none)]))) + -- Projection variants + ++ test "RevealClaim cPrj" (claimSerde (.reveal addr1 + (.cPrj (some 0) (some 1) (some addr2)))) + ++ test "RevealClaim rPrj" (claimSerde (.reveal addr1 + (.rPrj (some 2) (some addr2)))) + ++ test "RevealClaim iPrj" (claimSerde (.reveal addr1 + (.iPrj (some 3) (some addr2)))) + ++ test "RevealClaim dPrj" (claimSerde (.reveal addr1 + (.dPrj (some 0) (some addr2)))) + -- Empty fields + ++ test "RevealClaim defn all none" (claimSerde (.reveal addr1 + (.defn none none none none none))) + -- Quot variant + ++ test "RevealClaim quot" (claimSerde (.reveal addr1 + (.quot (some .type) (some 1) (some addr2)))) + +/-! ## Byte-level encoding tests -/ + +def claimEncodingTests : TestSeq := + let evalBytes := Claim.ser (.eval addr1 addr2) + let checkBytes := Claim.ser (.check addr1) + let revealSafetyOnly := Claim.ser (.reveal addr1 (.defn none (some .safe) none none none)) + let revealAllFields := Claim.ser (.reveal addr1 + (.defn (some .defn) (some .safe) (some 3) (some addr2) (some addr3))) + -- EvalClaim: starts with 0xE4, total 65 bytes (1 tag + 32 + 32) + test "EvalClaim tag byte is 0xE4" (evalBytes.data[0]! == 0xE4) + ++ test "EvalClaim size is 65" (evalBytes.size == 65) + -- CheckClaim: starts with 0xE3, total 33 bytes (1 tag + 32) + ++ test "CheckClaim tag byte is 0xE3" (checkBytes.data[0]! == 0xE3) + ++ test "CheckClaim size is 33" (checkBytes.size == 33) + -- RevealClaim: starts with 0xE6 + ++ test "RevealClaim tag byte is 0xE6" (revealSafetyOnly.data[0]! == 0xE6) + -- RevealClaim safety-only defn: 36 bytes (1 tag + 32 comm + 1 variant + 1 mask + 1 safety) + ++ test "RevealClaim safety-only defn size is 36" (revealSafetyOnly.size == 36) + -- RevealClaim with all defn fields should be larger + ++ test "RevealClaim all-fields defn is larger" (revealAllFields.size > revealSafetyOnly.size) + +/-! ## Suite -/ + +def Tests.Claim.suite : List TestSeq := [ + claimUnits, + claimEncodingTests, + checkIO "Claim serde roundtrips" (∀ c : Claim, claimSerde c), +] diff --git a/Tests/Ix/Commit.lean b/Tests/Ix/Commit.lean new file mode 100644 index 00000000..9b639a52 --- /dev/null +++ b/Tests/Ix/Commit.lean @@ -0,0 +1,244 @@ +/- + Tests for the commitment pipeline and claim construction. +-/ + +import LSpec +import Ix.Commit + +open LSpec +open Ixon (Comm runGet serCommTagged) +open Ix (Claim RevealConstantInfo) + +/-! ## Test addresses -/ + +private def payload1 : Address := Address.blake3 "payload1".toUTF8 +private def payload2 : Address := Address.blake3 "payload2".toUTF8 +private def secret1 : Address := Address.blake3 "secret1".toUTF8 +private def secret2 : Address := Address.blake3 "secret2".toUTF8 + +/-! ## Comm.commit determinism tests -/ + +def commDeterminismTests : TestSeq := + let comm1 := Comm.mk secret1 payload1 + let comm2 := Comm.mk secret1 payload1 + -- Same comm → same address + test "Comm.commit deterministic" (Comm.commit comm1 == Comm.commit comm2) + -- Different secrets → different addresses + ++ test "Different secrets produce different commit addresses" + (Comm.commit (Comm.mk secret1 payload1) != Comm.commit (Comm.mk secret2 payload1)) + -- Different payloads → different addresses + ++ test "Different payloads produce different commit addresses" + (Comm.commit (Comm.mk secret1 payload1) != Comm.commit (Comm.mk secret1 payload2)) + -- Verify commitment format: tagged serialization starts with 0xE5 and is 65 bytes + ++ test "serCommTagged starts with 0xE5" + ((serCommTagged comm1).data[0]! == 0xE5) + ++ test "serCommTagged is 65 bytes" + ((serCommTagged comm1).size == 65) + +/-! ## Claim.commit tests -/ + +def claimCommitTests : TestSeq := + let evalClaim := Claim.eval payload1 payload2 + let checkClaim := Claim.check payload1 + let revealSafety := Claim.reveal payload1 (.defn none (some .safe) none none none) + let revealKind := Claim.reveal payload1 (.defn (some .defn) none none none none) + let revealBoth := Claim.reveal payload1 (.defn (some .defn) (some .safe) none none none) + -- Claim.commit is deterministic + test "Claim.commit deterministic" (Claim.commit evalClaim == Claim.commit evalClaim) + -- Different claim types → different addresses + ++ test "eval and check have different commit addresses" + (Claim.commit evalClaim != Claim.commit checkClaim) + -- Different reveal fields → different addresses + ++ test "Reveal safety-only differs from kind-only" + (Claim.commit revealSafety != Claim.commit revealKind) + ++ test "Reveal kind-only differs from kind+safety" + (Claim.commit revealKind != Claim.commit revealBoth) + ++ test "Reveal safety-only differs from kind+safety" + (Claim.commit revealSafety != Claim.commit revealBoth) + +/-! ## RevealClaim field combination tests -/ + +private def claimRoundtrips (c : Claim) : Bool := + let bytes := Claim.ser c + match runGet Claim.get bytes with + | .ok c' => c' == c + | .error _ => false + +private def allNoneClaim : Claim := + Claim.reveal payload1 (.defn none none none none none) + +private def allFieldsClaim : Claim := + Claim.reveal payload1 + (.defn (some .defn) (some .safe) (some 3) (some payload2) (some payload1)) + +private def singleFieldAddrsDistinct : Bool := + let comm := payload1 + let addrK := Claim.commit (.reveal comm (.defn (some .defn) none none none none)) + let addrS := Claim.commit (.reveal comm (.defn none (some .safe) none none none)) + let addrL := Claim.commit (.reveal comm (.defn none none (some 1) none none)) + let addrT := Claim.commit (.reveal comm (.defn none none none (some payload2) none)) + let addrV := Claim.commit (.reveal comm (.defn none none none none (some payload2))) + addrK != addrS && addrK != addrL && addrK != addrT && addrK != addrV && + addrS != addrL && addrS != addrT && addrS != addrV && + addrL != addrT && addrL != addrV && + addrT != addrV + +def fieldCombinationTests : TestSeq := + -- All-none fields still produce a valid claim + test "All-none defn serialization roundtrips" (claimRoundtrips allNoneClaim) + -- All fields present still produce a valid claim + ++ test "All-fields defn serialization roundtrips" (claimRoundtrips allFieldsClaim) + -- Each single field produces a distinct commit address + ++ test "Single-field reveals have distinct commit addresses" singleFieldAddrsDistinct + +/-! ## compileDef determinism tests -/ + +private def emptyCompileEnv : Ix.CompileM.CompileEnv := + Ix.CompileM.CompileEnv.new { consts := {} } + +-- def anon : Type := Prop +private def simpleType : Lean.Expr := Lean.mkSort (.succ .zero) +private def simpleValue : Lean.Expr := Lean.mkSort .zero + +-- def anon : Type 1 := Type +private def simpleType2 : Lean.Expr := Lean.mkSort (.succ (.succ .zero)) +private def simpleValue2 : Lean.Expr := Lean.mkSort (.succ .zero) + +private def compileDefSucceeds : Bool := + match Ix.Commit.compileDef emptyCompileEnv [] simpleType simpleValue with + | .ok _ => true + | .error _ => false + +private def compileDefDeterministic : Bool := + match Ix.Commit.compileDef emptyCompileEnv [] simpleType simpleValue, + Ix.Commit.compileDef emptyCompileEnv [] simpleType simpleValue with + | .ok (addr1, _), .ok (addr2, _) => addr1 == addr2 + | _, _ => false + +private def compileDefDifferentValueDifferentAddr : Bool := + match Ix.Commit.compileDef emptyCompileEnv [] simpleType simpleValue, + Ix.Commit.compileDef emptyCompileEnv [] simpleType2 simpleValue2 with + | .ok (addr1, _), .ok (addr2, _) => addr1 != addr2 + | _, _ => false + +-- Alpha-invariance: constant name must not affect the content address +private def compileDefAlphaConstName : Bool := + match Ix.Commit.compileDef emptyCompileEnv [] simpleType simpleValue (name := .anonymous), + Ix.Commit.compileDef emptyCompileEnv [] simpleType simpleValue (name := .str .anonymous "Foo") with + | .ok (addr1, _), .ok (addr2, _) => addr1 == addr2 + | _, _ => false + +-- Alpha-invariance: binder names must not affect the content address +-- fun (x : Prop) => x vs fun (y : Prop) => y +private def propExpr : Lean.Expr := Lean.mkSort .zero +private def binderType1 : Lean.Expr := Lean.mkForall `x .default propExpr propExpr +private def binderValue1 : Lean.Expr := Lean.mkLambda `x .default propExpr (Lean.mkBVar 0) +private def binderType2 : Lean.Expr := Lean.mkForall `y .default propExpr propExpr +private def binderValue2 : Lean.Expr := Lean.mkLambda `y .default propExpr (Lean.mkBVar 0) + +private def compileDefAlphaBinderNames : Bool := + match Ix.Commit.compileDef emptyCompileEnv [] binderType1 binderValue1, + Ix.Commit.compileDef emptyCompileEnv [] binderType2 binderValue2 with + | .ok (addr1, _), .ok (addr2, _) => addr1 == addr2 + | _, _ => false + +-- Alpha-invariance: level parameter names must not affect the content address +-- def _.{u} : Sort (u+1) := Sort u vs def _.{v} : Sort (v+1) := Sort v +private def compileDefAlphaLevelNames : Bool := + let typ1 := Lean.mkSort (.succ (.param `u)) + let val1 := Lean.mkSort (.param `u) + let typ2 := Lean.mkSort (.succ (.param `v)) + let val2 := Lean.mkSort (.param `v) + match Ix.Commit.compileDef emptyCompileEnv [`u] typ1 val1, + Ix.Commit.compileDef emptyCompileEnv [`v] typ2 val2 with + | .ok (addr1, _), .ok (addr2, _) => addr1 == addr2 + | _, _ => false + +def compileDefTests : TestSeq := + test "compileDef succeeds on simple def" compileDefSucceeds + ++ test "compileDef produces same address for same inputs" compileDefDeterministic + ++ test "compileDef produces different address for different inputs" compileDefDifferentValueDifferentAddr + ++ test "alpha-invariance: constant name does not affect address" compileDefAlphaConstName + ++ test "alpha-invariance: binder names do not affect address" compileDefAlphaBinderNames + ++ test "alpha-invariance: level param names do not affect address" compileDefAlphaLevelNames + +/-! ## checkClaim and revealClaim tests -/ + +private def checkClaimSucceeds : Bool := + match Ix.Commit.checkClaim emptyCompileEnv [] simpleType simpleValue with + | .ok (.check _) => true + | _ => false + +private def checkClaimMatchesCompileDef : Bool := + match Ix.Commit.compileDef emptyCompileEnv [] simpleType simpleValue, + Ix.Commit.checkClaim emptyCompileEnv [] simpleType simpleValue with + | .ok (addr, _), .ok (.check claimAddr) => addr == claimAddr + | _, _ => false + +private def openConstantInfoDefn : Bool := + match Ix.Commit.compileDef emptyCompileEnv [] simpleType simpleValue with + | .ok (_, env') => + -- Look up the compiled constant via the anonymous name + let (ixName, _) := (Ix.CanonM.canonName .anonymous).run {} + match env'.nameToNamed.get? ixName with + | some named => + match env'.constants.get? named.addr with + | some constant => + let info := Ix.Commit.openConstantInfo constant.info + -- Should be a defn variant with all fields some + match info with + | .defn (some _) (some .safe) (some _) (some _) (some _) => true + | _ => false + | none => false + | none => false + | .error _ => false + +private def openConstantInfoRoundtrips : Bool := + match Ix.Commit.compileDef emptyCompileEnv [] simpleType simpleValue with + | .ok (_, env') => + let (ixName, _) := (Ix.CanonM.canonName .anonymous).run {} + match env'.nameToNamed.get? ixName with + | some named => + match env'.constants.get? named.addr with + | some constant => + let info := Ix.Commit.openConstantInfo constant.info + -- The fully-revealed RevealConstantInfo should serde roundtrip + claimRoundtrips (.reveal payload1 info) + | none => false + | none => false + | .error _ => false + +private def revealClaimWrapper : Bool := + let info : RevealConstantInfo := .defn (some .defn) none none none none + let claim := Ix.Commit.revealClaim payload1 info + claim == Claim.reveal payload1 info + +def claimConstructorTests : TestSeq := + test "checkClaim succeeds" checkClaimSucceeds + ++ test "checkClaim address matches compileDef" checkClaimMatchesCompileDef + ++ test "openConstantInfo produces defn with all fields" openConstantInfoDefn + ++ test "openConstantInfo result serde roundtrips" openConstantInfoRoundtrips + ++ test "revealClaim wraps correctly" revealClaimWrapper + +/-! ## IO tests for commitConst -/ + +def commitConstIOTest : TestSeq := + .individualIO "commitConst: different random secrets produce different addresses" (do + let payload := Address.blake3 "test-payload".toUTF8 + let (_, commitAddr1) ← Ix.Commit.commitConst payload + let (_, commitAddr2) ← Ix.Commit.commitConst payload + return (commitAddr1 != commitAddr2, none)) .done + +/-! ## Suite registration -/ + +def Tests.Commit.suite : List TestSeq := [ + commDeterminismTests, + claimCommitTests, + fieldCombinationTests, + compileDefTests, + claimConstructorTests, +] + +def Tests.Commit.suiteIO : List TestSeq := [ + commitConstIOTest, +] diff --git a/Tests/Ix/Common.lean b/Tests/Ix/Common.lean deleted file mode 100644 index 4134269a..00000000 --- a/Tests/Ix/Common.lean +++ /dev/null @@ -1,89 +0,0 @@ -import LSpec -import Ix.Common -import Ix.Ixon -import Ix.Address -import LSpec.SlimCheck.Gen -import LSpec -import Blake3 -import Tests.Common - -open LSpec -open SlimCheck -open SlimCheck.Gen -open Ixon - ---def genAddress : SlimCheck.Gen Address := --- pure (Address.mk (Blake3.hash "foobar".toUTF8).val) --- ----- TODO: Bias char distribution towards ASCII to be more useful ---def genChar : SlimCheck.Gen Char := --- Char.ofNat <$> (choose Nat 0 0xd800) --- ---def genBool : Gen Bool := choose Bool .false true --- ---def genListSize (gen: Gen α) (lo hi: Nat): Gen (List α) := do --- let n ← choose Nat lo hi --- List.mapM (fun _ => gen) (List.range n) --- ----- aggressively reduce size parameter to avoid tree blow-up ---def genList (n: Gen α) : Gen (List α) := do --- resize (fun s => if s > 8 then 8 else s / 2) $ listOf n --- ---def genString : SlimCheck.Gen String := do --- let cs ← genList genChar --- return String.mk cs --- -----def genNat' : Gen Nat := choose Nat 0 10 --- ---def genNat : Gen Nat := USize.toNat <$> genUSize --- -----def genList' (gen: Gen α) : Gen (List α) := do ----- let n ← genNat' ----- List.mapM (fun _ => gen) (List.range n) --- ---def genOption (gen: Gen α) : Gen (Option α) := --- oneOf' [ pure .none, .some <$> gen] --- ---def genAlphaNum : Gen Char := do --- let n <- frequency --- [ (50, choose Nat 48 57), --- (50, choose Nat 65 90), --- (100, choose Nat 97 122), --- ] --- return Char.ofNat n --- ---def genAlphaNumStr : Gen String := do --- String.mk <$> genList genAlphaNum --- ---def genNamePart : Gen Ixon.NamePart := --- frequency [ (50, .str <$> genAlphaNumStr) --- , (50, .num <$> genNat) --- ] --- ---def genName : Gen Lean.Name := Ixon.nameFromParts <$> (fun x => [x]) <$> genNamePart --- ---def genBinderInfo : Gen Lean.BinderInfo := oneOf' --- [ pure .default --- , pure .instImplicit --- , pure .strictImplicit --- , pure .instImplicit --- ] --- ---def genDefKind : Gen Ix.DefKind := oneOf' --- [ pure .opaque --- , pure .theorem --- , pure .definition --- ] --- ---def genReducibilityHints : Gen Lean.ReducibilityHints := oneOf' --- [ pure .opaque --- , pure .abbrev --- --, (.regular ·.toUInt32) <$> genUSize --- ] --- ---def genQuotKind : Gen Lean.QuotKind := oneOf' --- [ pure .type --- , pure .ctor --- , pure .lift --- , pure .ind --- ] diff --git a/Tests/Ix/Compile.lean b/Tests/Ix/Compile.lean index 0cfb78dd..bd940332 100644 --- a/Tests/Ix/Compile.lean +++ b/Tests/Ix/Compile.lean @@ -1,326 +1,480 @@ -import LSpec +/- + Cross-implementation tests for Compilation. + Verifies Lean and Rust implementations produce equivalent results. +-/ import Ix.Ixon +import Ix.Environment import Ix.Address import Ix.Common +import Ix.Meta +import Ix.CompileM import Ix.CondenseM import Ix.GraphM -import Ix.CompileM -import Ix.DecompileM -import Ix.Cronos -import Ix.Meta -import Ix.Store +import Ix.Sharing import Lean +import LSpec import Tests.Ix.Fixtures -import Tests.Ix.Fixtures.Mutual -import Lean open LSpec -open Ix ---open Ix.Decompile - - -namespace Test.Ix.Inductives - -mutual - unsafe inductive A | a : B → C → A - unsafe inductive B | b : A → B - unsafe inductive C | c : A → C -end - -end Test.Ix.Inductives - -namespace Test.Ix.Mutual - -mutual - unsafe def A : Nat → Nat - | 0 => 0 - | n + 1 => B n + C n + 1 - - unsafe def B : Nat → Nat - | 0 => 0 - | n + 1 => A n + 1 - - unsafe def C : Nat → Nat - | 0 => 0 - | n + 1 => A n + 1 -end - -end Test.Ix.Mutual - - ---def addrOfName (name: Lean.Name) : IO Address := do --- let env <- get_env! --- let const := match (env.find? name) with --- | .some c => c --- | .none => env.getDelta.find! name --- let ((a, _), _) <- (Ix.Compile.compileConst const).runIO env --- return a - ---def testUnits : IO TestSeq := do --- let x <- addrOfName `Nat --- IO.println s!"Nat is {x}" --- let x <- addrOfName `Nat.add --- IO.println s!"Nat.add is {x}" --- let x <- addrOfName `Peano --- IO.println s!"Peano is {x}" --- let x <- addrOfName `Peano.add --- IO.println s!"Peano.add is {x}" --- return test "false" (true == false) - ---def testMutual : IO TestSeq := do --- let env <- get_env! --- let mut cstt : CompileState := .init env 0 --- let all := (env.getDelta.find! `Test.Ix.Mutual.A).all --- let consts <- all.mapM fun n => match env.getDelta.find! n with --- | .defnInfo d => pure <| Ix.MutConst.mkDefn d --- | .opaqueInfo d => pure <| Ix.MutConst.mkOpaq d --- | .thmInfo d => pure <| Ix.MutConst.mkTheo d --- | _ => throw (IO.userError "not a def") --- let (dss, _) <- match (<- (sortConsts consts).run .init cstt) with --- | (.ok a, stt) => do --- pure (a, stt) --- | (.error e, _) => do --- throw (IO.userError (<- e.pretty)) --- let res := [[`Test.Ix.Mutual.B, `Test.Ix.Mutual.C],[`Test.Ix.Mutual.A]] --- let nss := dss.map fun ds => ds.map (·.name) --- return test "test mutual" (res == nss) - ---#eval show Lean.MetaM _ from do --- let env ← Lean.getEnv --- let gstt := GroundM.run groundConst (env.find! `Array.mk) --- return gstt. --- ---#eval show Lean.MetaM _ from do --- let env ← Lean.getEnv --- return env.find? `Array.foldrMUnsafe.fold._at.Std.DHashMap.Const.toList._spec_2 - ---def testInductives : IO TestSeq := do --- let env <- get_env! --- let mut cstt : CompileState := .init env 0 --- --let delta := env.getDelta.filter fun n _ => namesp.isPrefixOf n --- --let consts := env.getConstMap.filter fun n _ => namesp.isPrefixOf n --- let all := (env.getDelta.find! `Test.Ix.Inductives.A).all --- let consts <- all.mapM fun n => match env.getDelta.find! n with --- | .inductInfo v => do match (<- (Ix.MutConst.mkIndc v).run .init cstt) with --- | (.ok a, _) => pure a --- | (.error e, _) => do throw (IO.userError (<- e.pretty)) --- | _ => throw (IO.userError "not an inductive") --- let (dss, _) <- do match (<- (sortConsts consts).run .init cstt) with --- | (.ok a, stt) => do --- pure (a, stt) --- | (.error e, _) => do --- throw (IO.userError (<- e.pretty)) --- let res := [[`Test.Ix.Inductives.C],[`Test.Ix.Inductives.B], [`Test.Ix.Inductives.A]] --- let nss := dss.map fun ds => ds.map (·.name) --- return test "test inductives" (res == nss) - ---def testEasy : IO TestSeq := do --- let env <- get_env! --- let easy := [ --- `Nat.add_comm --- ] --- let mut res := true --- for name in easy do --- IO.println s!"⚙️ Compiling {name}" --- let mut cstt : CompileState := .init env 0 --- let start <- IO.monoNanosNow --- let (addr, stt) <- do match (<- (compileConstName name).run .init cstt) with --- | (.ok a, stt) => pure (a, stt) --- | (.error e, _) => IO.println s!"failed {name}" *> throw (IO.userError (<- e.pretty)) --- let done <- IO.monoNanosNow --- IO.println s!"✅ {addr}" --- IO.println s!"Elapsed {Cronos.nanoToSec (done - start)}" --- return test "easy compile roundtrip" (res == true) - ---def testDifficult : IO TestSeq := do --- let env <- get_env! --- let difficult := [ --- `Std.Tactic.BVDecide.BVExpr.bitblast.blastUdiv.denote_blastDivSubtractShift_q --- ] --- let mut res := true --- for name in difficult do --- let mut cstt : CompileState := .init env 0 --- let (addr, stt) <- do match (<- (compileConstName name).run .init cstt) with --- | (.ok a, stt) => pure (a, stt) --- | (.error e, _) => IO.println s!"failed {name}" *> throw (IO.userError (<- e.pretty)) --- IO.println s!"{name} -> {addr}" --- --cstt := stt --- --let mut store : Ixon.Store := {} --- --for (_,(a, b)) in cstt.names do --- -- let a_ixon <- (Store.readConst a).toIO --- -- let b_ixon <- (Store.readConst b).toIO --- -- store := store.insert a a_ixon --- -- store := store.insert b b_ixon --- --let denv := DecompileEnv.init cstt.names store --- --let dstt <- match decompileEnv.run denv default with --- -- | .ok _ s => pure s --- -- --IO.println s!"✓ {n} @ {anon}:{meta}" --- -- | .error e _ => do --- -- throw (IO.userError e.pretty) --- --IO.println s!"decompiled env" --- --for (n, (anon, meta)) in denv.names do --- -- let c <- match env.constants.find? n with --- -- | .some c => pure c --- -- | .none => throw (IO.userError "name {n} not in env") --- -- match dstt.constants.get? n with --- -- | .some c2 => --- -- if c.stripMData == c2.stripMData --- -- then --- -- IO.println s!"✓ {n} @ {anon}:{meta}" --- -- else --- -- IO.println s!"× {n} @ {anon}:{meta}" --- -- IO.FS.writeFile "c.out" s!"{repr c.stripMData}" --- -- IO.FS.writeFile "c2.out" s!"{repr c2.stripMData}" --- -- res := false --- -- break --- -- | .none => do --- -- let e' := (DecompileError.unknownName default n).pretty --- -- throw (IO.userError e') --- return test "difficult compile roundtrip" (res == true) - -def time (starts stops: Nat) : Float := Cronos.nanoToSec (stops - starts) - -def testRoundtripGetEnv : IO TestSeq := do - IO.println s!"Getting env" - let envStart <- IO.monoNanosNow - let env <- get_env! - let envStop <- IO.monoNanosNow - IO.println s!"got Lean.Environment in {time envStart envStop}" - let sizeStart <- IO.monoNanosNow - let envSize := env.constants.fold (fun x _ _=> x + 1) 0 - IO.println s!"counted Lean.Environment with {envSize} constants" - let sizeStop <- IO.monoNanosNow - IO.println s!"..in {time sizeStart sizeStop}" - --let numConst := envSize - IO.println s!"Compiling env" - --let mut inConst := 1 - let allStart <- IO.monoNanosNow - let res <- CompileM.envTopological env {} - let allDone <- IO.monoNanosNow - IO.println s!"Compiled env of {res.consts.size} in {Cronos.nanoToSec (allDone - allStart)}" - --for (name, _) in env.constants do - -- IO.println s!"Compiling {name} {inConst}/{numConst}" - -- let start <- IO.monoNanosNow - -- let (addr, stt) <- do match (<- (compileConstName name).run .init cstt) with - -- | (.ok a, stt) => pure (a, stt) - -- | (.error e, _) => do - -- IO.println s!"failed {name}" - -- throw (IO.userError (<- e.pretty)) - -- let done <- IO.monoNanosNow - -- let pct := ((Float.ofNat inConst) / Float.ofNat numConst) - -- let total := done - allStart - -- IO.println s!"-> Compiled {pct * 100}%, {inConst}/{numConst}, - -- Elapsed {Cronos.nanoToSec (done - start)}/{Cronos.nanoToSec total}, - -- Remaining {((Cronos.nanoToSec total) / pct) / 60} min - -- {name} - -- {addr}" - -- cstt := stt - -- let denv := DecompileEnv.init cstt.constCache cstt.store - -- let (name', stt) <- match DecompileM.run denv dstt (decompileNamedConst name addr) with - -- | .ok (n,_) stt => pure (n, stt) - -- | .error e _ => do - -- IO.println s!"failed {name}" - -- IO.println s!"cstt all: {repr <| cstt.alls.get? name}" - -- --let c := env.constants.find? name - -- --IO.println s!"{repr c}" - -- throw (IO.userError e.pretty) - -- match env.constants.find? name, stt.constants.find? name' with - -- | .some c, .some c' => if c == c then pure () else do - -- IO.println s!"failed {name} {repr c} {repr c'}" - -- throw (IO.userError "decompiled constant not equal") - -- | .some _, .none => do - -- throw (IO.userError s!"{name'} not found in dstt") - -- | .none, _ => do - -- throw (IO.userError "{name} not found in env") - -- let done2 <- IO.monoNanosNow - -- let total2 := done2 - allStart - -- IO.println s!"<- Decompiled {pct * 100}%, {inConst}/{numConst}, - -- Elapsed {Cronos.nanoToSec (done2 - done)}/{Cronos.nanoToSec total}, - -- Remaining {((Cronos.nanoToSec total2) / pct) / 60} min - -- {name} - -- {addr}" - -- inConst := inConst + 1 - -- dstt := stt - --let allDone <- IO.monoNanosNow - --IO.println s!"Compiled/decompiled {name} in {Cronos.nanoToSec (allDone - allStart)}" - -- IO.println s!"decompiling env" - -- let mut store : Ixon.Store := {} - -- for (_,(a, b)) in cstt.names do - -- let a_ixon <- (Store.readConst a).toIO - -- let b_ixon <- (Store.readConst b).toIO - -- store := store.insert a a_ixon - -- store := store.insert b b_ixon - -- let denv := DecompileEnv.init cstt.names store - -- let dstt <- match decompileEnv.run denv default with - -- | .ok _ s => pure s - -- --IO.println s!"✓ {n} @ {anon}:{meta}" - -- | .error e _ => do - -- throw (IO.userError e.pretty) - -- IO.println s!"decompiled env" - -- let mut res := true - -- for (n, (anon, meta)) in denv.names do - -- let c <- match env.constants.find? n with - -- | .some c => pure c - -- | .none => throw (IO.userError "name {n} not in env") - -- match dstt.constants.find? n with - -- | .some c2 => - -- if c.stripMData == c2.stripMData - -- then - -- IO.println s!"✓ {n} @ {anon}:{meta}" - -- else - -- IO.println s!"× {n} @ {anon}:{meta}" - -- IO.FS.writeFile "c.out" s!"{repr c.stripMData}" - -- IO.FS.writeFile "c2.out" s!"{repr c2.stripMData}" - -- res := false - -- break - -- | .none => do - -- let e' := (DecompileError.unknownName default n).pretty - -- throw (IO.userError e') - -- IO.println s!"input delta: {env.getDelta.toList.length}" - -- IO.println s!"input env: {env.constants.toList.length}" - -- IO.println s!"output env: {dstt.constants.toList.length}" - return test "env compile roundtrip" true --(res == true) - ---#eval (`_cstage2).isSuffixOf (`f.a._cstage2) ---open Lean Meta --- ---set_option pp.all true ---set_option pp.privateNames true ---set_option pp.fullNames true - ---#eval show MetaM _ from do --- getConstInfo `Lean.Language.instInhabitedDynamicSnapshot._closed_2._cstage2 --- ---#eval show MetaM _ from do --- getConstInfo `Lean.Language.instInhabitedDynamicSnapshot._closed_2._cstage2 --- ---def printPrivate (pre : Name) : CoreM (Array Name) := do --- let env ← getEnv --- let mut hits := #[] --- for (n, _) in env.const2ModIdx do --- if pre.isPrefixOf n then --- hits := hits.push n --- pure hits --- ---#eval do --- let hits ← printPrivate `_private.Lean.Language.Basic --- IO.println s!"found:" --- for n in hits do --- IO.println s!"{n}" --- ---#eval --- IO.println s!"{(mkPrivateNameCore `Lean.Language.Basic `Lean.Language.DynamicSnapShot)}" --- ---#eval show MetaM _ from do --- let env ← getEnv --- return env.const2ModIdx.size - -def Tests.Ix.Compile.suiteIO: List (IO TestSeq) := [ - --testMutual, - --testInductives, - --testDifficult, - --testUnits, - --testEasy, - testRoundtripGetEnv + +namespace Tests.Compile + +/-! ## Helpers -/ + +/-- Show hex dump of first N bytes -/ +def hexDump (bytes : ByteArray) (maxBytes : Nat := 64) : String := Id.run do + let mut s := "" + for i in [:min bytes.size maxBytes] do + if i > 0 && i % 16 == 0 then s := s ++ "\n" + else if i > 0 && i % 8 == 0 then s := s ++ " " + else if i > 0 then s := s ++ " " + let b := bytes.get! i + let hi := b / 16 + let lo := b % 16 + let toHex (n : UInt8) : Char := if n < 10 then Char.ofNat (n.toNat + 48) else Char.ofNat (n.toNat + 87) + s := s ++ String.singleton (toHex hi) ++ String.singleton (toHex lo) + if bytes.size > maxBytes then s := s ++ s!" ... ({bytes.size} bytes total)" + return s + +/-- Format a byte count with appropriate unit suffix (B, kB, MB, GB). -/ +def fmtBytes (n : Nat) : String := + if n < 1024 then s!"{n} B" + else if n < 1024 * 1024 then + let kb := n * 10 / 1024 + s!"{kb / 10}.{kb % 10} kB" + else if n < 1024 * 1024 * 1024 then + let mb := n * 10 / (1024 * 1024) + s!"{mb / 10}.{mb % 10} MB" + else + let gb := n * 10 / (1024 * 1024 * 1024) + s!"{gb / 10}.{gb % 10} GB" + +/-- Find first byte position where two arrays differ -/ +def findFirstDiff (a b : ByteArray) : Option Nat := Id.run do + for i in [:min a.size b.size] do + if a.get! i != b.get! i then return some i + if a.size != b.size then return some (min a.size b.size) + return none + +/-! ## Comparison Results -/ + +/-- Result of comparing a single constant -/ +structure ConstCompareResult where + name : Ix.Name + leanAddr : Address + rustAddr : Address + isMatch : Bool + leanBytes : ByteArray + rustBytes : ByteArray + +/-- Result of a per-constant metadata comparison -/ +structure MetaMismatch where + name : Ix.Name + leanByType : Nat × Nat × Nat × Nat × Nat -- (binder, letBinder, ref, prj, mdata) + rustByType : Nat × Nat × Nat × Nat × Nat + +/-- Result of comparing all constants -/ +structure CompareResult where + totalConstants : Nat + matchingConstants : Nat + mismatchedConstants : Array ConstCompareResult + metaMismatches : Array MetaMismatch + fullMetaMismatches : Array (Ix.Name × Ixon.ConstantMeta × Ixon.ConstantMeta) + missingInRust : Array Ix.Name + missingInLean : Array Ix.Name + +/-- Compare Lean and Rust compilation results using Ixon.Env -/ +def compareEnvResults + (leanEnv rustEnv : Ixon.Env) + : CompareResult := Id.run do + let mut matching := 0 + let mut mismatched : Array ConstCompareResult := #[] + let mut metaMismatches : Array MetaMismatch := #[] + let mut fullMetaMismatches : Array (Ix.Name × Ixon.ConstantMeta × Ixon.ConstantMeta) := #[] + let mut missingInRust : Array Ix.Name := #[] + let mut missingInLean : Array Ix.Name := #[] + + -- Check all Lean constants against Rust + for (name, leanNamed) in leanEnv.named do + let leanAddr := leanNamed.addr + match rustEnv.named.get? name with + | some rustNamed => + let rustAddr := rustNamed.addr + if leanAddr == rustAddr then + matching := matching + 1 + else + let leanBytes := leanEnv.consts.get? leanAddr |>.map Ixon.ser |>.getD default + let rustBytes := rustEnv.consts.get? rustAddr |>.map Ixon.ser |>.getD default + mismatched := mismatched.push ⟨name, leanAddr, rustAddr, false, leanBytes, rustBytes⟩ + -- Check metadata regardless of addr match + let leanMeta := leanNamed.constMeta.exprMetaByType + let rustMeta := rustNamed.constMeta.exprMetaByType + if leanMeta != rustMeta then + metaMismatches := metaMismatches.push ⟨name, leanMeta, rustMeta⟩ + -- Full ConstantMeta comparison (catches all/ctx/lvls/hints differences) + if leanNamed.constMeta != rustNamed.constMeta then + fullMetaMismatches := fullMetaMismatches.push (name, leanNamed.constMeta, rustNamed.constMeta) + | none => + missingInRust := missingInRust.push name + + -- Check for constants in Rust but not in Lean + for (name, _) in rustEnv.named do + if !leanEnv.named.contains name then + missingInLean := missingInLean.push name + + { + totalConstants := leanEnv.named.size + matchingConstants := matching + mismatchedConstants := mismatched + metaMismatches + fullMetaMismatches + missingInRust + missingInLean + } + +/-- Serialize a Lean Ixon.Env to bytes -/ +def serializeEnv (env : Ixon.Env) : ByteArray := + Ixon.serEnv env + +/-! ## Integrated Test -/ + +/-- Cross-implementation compilation test using the new CompilePhases API -/ +def testCrossImpl : TestSeq := + .individualIO "Compilation Cross-Implementation" (do + let leanEnv ← get_env! + let totalConsts := leanEnv.constants.toList.length + + IO.println s!"[Test] Cross-Implementation Compilation Test" + IO.println s!"[Test] Environment has {totalConsts} constants" + IO.println "" + + -- Step 1: Get all compilation phases from Rust in one call + IO.println s!"[Step 1] Running Rust compilation pipeline (rsCompilePhases)..." + let rustStart ← IO.monoMsNow + let phases ← Ix.CompileM.rsCompilePhases leanEnv + let rustTime := (← IO.monoMsNow) - rustStart + + IO.println s!"[Step 1] Rust: {phases.rawEnv.consts.size} consts, {phases.condensed.blocks.size} blocks, {phases.compileEnv.constCount} compiled in {rustTime}ms" + IO.println "" + + -- Step 2: Compile with Lean using Rust's rawEnv and condensed blocks + IO.println s!"[Step 2] Running Lean parallel compilation..." + let leanStart ← IO.monoMsNow + + match ← Ix.CompileM.compileEnvParallel phases.rawEnv phases.condensed (rustRef := none) (dbg := true) with + | .error err => + let leanTime := (← IO.monoMsNow) - leanStart + IO.println s!"[Step 2] Compilation failed after {leanTime}ms" + if let some sysErr := err.systemError then + IO.println s!"[Error] {sysErr}" + return (false, some sysErr) + return (false, some "Compilation failed") + + | .ok (leanIxonEnv, totalBytes) => + let leanTime := (← IO.monoMsNow) - leanStart + IO.println s!"[Step 2] Lean: {leanIxonEnv.constCount} constants, {fmtBytes totalBytes} in {leanTime}ms" + IO.println "" + + -- Step 3: Compare compilation results + IO.println s!"[Step 3] Comparing Lean and Rust results..." + let compareStart ← IO.monoMsNow + + let result := compareEnvResults leanIxonEnv phases.compileEnv + + IO.print s!"[Step 3] Compared {result.totalConstants} " + let compareTime := (← IO.monoMsNow) - compareStart + IO.println s!"constants in {compareTime}ms" + + if result.mismatchedConstants.isEmpty && result.missingInRust.isEmpty && result.missingInLean.isEmpty then + IO.println s!"[Step 3] All {result.matchingConstants} constants match! ✓" + + if !result.metaMismatches.isEmpty then + IO.println s!"[Step 3] Metadata mismatches: {result.metaMismatches.size} constants" + -- Aggregate deltas + let mut totB : Int := 0; let mut totL : Int := 0; let mut totR : Int := 0 + let mut totP : Int := 0; let mut totM : Int := 0 + let mut negCount := 0 -- constants where Rust has MORE metadata + for mm in result.metaMismatches do + let (lb, ll, lr, lp, lm) := mm.leanByType + let (rb, rl, rr, rp, rm) := mm.rustByType + let db := (Int.ofNat lb) - (Int.ofNat rb) + let dl := (Int.ofNat ll) - (Int.ofNat rl) + let dr := (Int.ofNat lr) - (Int.ofNat rr) + let dp := (Int.ofNat lp) - (Int.ofNat rp) + let dm := (Int.ofNat lm) - (Int.ofNat rm) + totB := totB + db; totL := totL + dl; totR := totR + dr + totP := totP + dp; totM := totM + dm + if db < 0 || dl < 0 || dr < 0 || dp < 0 || dm < 0 then + negCount := negCount + 1 + IO.println s!"[Step 3] Totals: Δbinder={totB} Δlet={totL} Δref={totR} Δprj={totP} Δmdata={totM}" + IO.println s!"[Step 3] Constants where Rust > Lean: {negCount}" + -- Show first 30 mismatches + for mm in result.metaMismatches[:min 30 result.metaMismatches.size] do + let (lb, ll, lr, lp, lm) := mm.leanByType + let (rb, rl, rr, rp, rm) := mm.rustByType + let db := (Int.ofNat lb) - (Int.ofNat rb) + let dl := (Int.ofNat ll) - (Int.ofNat rl) + let dr := (Int.ofNat lr) - (Int.ofNat rr) + let dp := (Int.ofNat lp) - (Int.ofNat rp) + let dm := (Int.ofNat lm) - (Int.ofNat rm) + IO.println s!" {mm.name} Δbinder={db} Δlet={dl} Δref={dr} Δprj={dp} Δmdata={dm}" + -- Detailed dump for first mismatch with Δref>0 + if let some mm := result.metaMismatches.find? (fun mm => + let (_, _, lr, _, _) := mm.leanByType + let (_, _, rr, _, _) := mm.rustByType + lr > rr) then + IO.println s!"\n[Detail] {mm.name}" + let leanNamed := leanIxonEnv.named.get! mm.name + let rustNamed := phases.compileEnv.named.get! mm.name + let dumpArena (label : String) (tag : String) (arena : Ixon.ExprMetaArena) : IO Unit := do + IO.println s!" {label} {tag}: {arena.nodes.size} nodes" + for i in [:arena.nodes.size] do + IO.println s!" [{i}] {reprStr arena.nodes[i]!}" + let dumpMeta (label : String) (cm : Ixon.ConstantMeta) : IO Unit := do + match cm with + | .defn _ _ _ _ _ arena typeRoot valueRoot => do + dumpArena label "arena" arena + IO.println s!" {label} typeRoot={typeRoot} valueRoot={valueRoot}" + | .axio _ _ arena typeRoot => do + dumpArena label "arena" arena + IO.println s!" {label} typeRoot={typeRoot}" + | .quot _ _ arena typeRoot => do + dumpArena label "arena" arena + IO.println s!" {label} typeRoot={typeRoot}" + | .ctor _ _ _ arena typeRoot => do + dumpArena label "arena" arena + IO.println s!" {label} typeRoot={typeRoot}" + | .recr _ _ _ _ _ arena typeRoot ruleRoots => do + dumpArena label "arena" arena + IO.println s!" {label} typeRoot={typeRoot} ruleRoots={ruleRoots}" + | .indc _ _ _ _ _ arena typeRoot => do + dumpArena label "arena" arena + IO.println s!" {label} typeRoot={typeRoot}" + | .empty => IO.println s!" {label}: empty" + dumpMeta "Lean" leanNamed.constMeta + dumpMeta "Rust" rustNamed.constMeta + else + IO.println s!"[Step 3] All metadata matches! ✓" + + -- Check full ConstantMeta equality (catches all/ctx/lvls/hints differences) + if !result.fullMetaMismatches.isEmpty then + IO.println s!"[Step 3] Full ConstantMeta mismatches: {result.fullMetaMismatches.size}" + for (name, leanCM, rustCM) in result.fullMetaMismatches[:min 5 result.fullMetaMismatches.size] do + IO.println s!" {name}:" + -- Compare variant tags + let leanTag := match leanCM with | .empty => "empty" | .defn .. => "defn" | .axio .. => "axio" | .quot .. => "quot" | .indc .. => "indc" | .ctor .. => "ctor" | .recr .. => "recr" + let rustTag := match rustCM with | .empty => "empty" | .defn .. => "defn" | .axio .. => "axio" | .quot .. => "quot" | .indc .. => "indc" | .ctor .. => "ctor" | .recr .. => "recr" + if leanTag != rustTag then + IO.println s!" VARIANT DIFFERS: Lean={leanTag} Rust={rustTag}" + else + IO.println s!" variant: {leanTag}" + -- Field-by-field comparison for common variants + match leanCM, rustCM with + | .defn ln ll lh la lc larena ltr lvr, .defn rn rl rh ra rc rarena rtr rvr => do + if ln != rn then IO.println s!" name DIFFERS: Lean={ln} Rust={rn}" + if ll != rl then IO.println s!" lvls DIFFERS: Lean={ll.size} Rust={rl.size}" + if lh != rh then IO.println s!" hints DIFFERS" + if la != ra then IO.println s!" all DIFFERS: Lean={la} Rust={ra}" + if lc != rc then IO.println s!" ctx DIFFERS: Lean={lc} Rust={rc}" + if larena != rarena then IO.println s!" arena DIFFERS: Lean={larena.nodes.size} Rust={rarena.nodes.size}" + if ltr != rtr then IO.println s!" typeRoot DIFFERS: Lean={ltr} Rust={rtr}" + if lvr != rvr then IO.println s!" valueRoot DIFFERS: Lean={lvr} Rust={rvr}" + | .indc ln ll lctors la lc larena ltr, .indc rn rl rctors ra rc rarena rtr => do + if ln != rn then IO.println s!" name DIFFERS: Lean={ln} Rust={rn}" + if ll != rl then IO.println s!" lvls DIFFERS: Lean={ll.size} Rust={rl.size}" + if lctors != rctors then IO.println s!" ctors DIFFERS: Lean={lctors} Rust={rctors}" + if la != ra then IO.println s!" all DIFFERS: Lean={la} Rust={ra}" + if lc != rc then IO.println s!" ctx DIFFERS: Lean={lc} Rust={rc}" + if larena != rarena then IO.println s!" arena DIFFERS: Lean={larena.nodes.size} Rust={rarena.nodes.size}" + if ltr != rtr then IO.println s!" typeRoot DIFFERS: Lean={ltr} Rust={rtr}" + | .ctor ln ll li larena ltr, .ctor rn rl ri rarena rtr => do + if ln != rn then IO.println s!" name DIFFERS: Lean={ln} Rust={rn}" + if ll != rl then IO.println s!" lvls DIFFERS: Lean={ll.size} Rust={rl.size}" + if li != ri then IO.println s!" induct DIFFERS: Lean={li} Rust={ri}" + if larena != rarena then IO.println s!" arena DIFFERS: Lean={larena.nodes.size} Rust={rarena.nodes.size}" + if ltr != rtr then IO.println s!" typeRoot DIFFERS: Lean={ltr} Rust={rtr}" + | .recr ln ll lr la lc larena ltr lrr, .recr rn rl rr ra rc rarena rtr rrr => do + if ln != rn then IO.println s!" name DIFFERS: Lean={ln} Rust={rn}" + if ll != rl then IO.println s!" lvls DIFFERS: Lean={ll.size} Rust={rl.size}" + if lr != rr then IO.println s!" rules DIFFERS: Lean={lr} Rust={rr}" + if la != ra then IO.println s!" all DIFFERS: Lean={la} Rust={ra}" + if lc != rc then IO.println s!" ctx DIFFERS: Lean={lc} Rust={rc}" + if larena != rarena then IO.println s!" arena DIFFERS: Lean={larena.nodes.size} Rust={rarena.nodes.size}" + if ltr != rtr then IO.println s!" typeRoot DIFFERS: Lean={ltr} Rust={rtr}" + if lrr != rrr then IO.println s!" ruleRoots DIFFERS: Lean={lrr} Rust={rrr}" + | _, _ => IO.println s!" (other variant - use repr for details)" + else + IO.println s!"[Step 3] All full ConstantMeta match! ✓" + + IO.println "" + + -- Step 4: Compare serialized environments + IO.println s!"[Step 4] Comparing serialized environments..." + let serStart ← IO.monoMsNow + + -- Count total ExprMeta entries and mdata sizes across all named constants + let (leanTotalMetas, leanTotalMdata) := leanIxonEnv.named.fold (init := (0, 0)) fun (accMeta, accMdata) _ named => + let (metaCount, mdataSize) := named.constMeta.exprMetaStats + (accMeta + metaCount, accMdata + mdataSize) + let (rustTotalMetas, rustTotalMdata) := phases.compileEnv.named.fold (init := (0, 0)) fun (accMeta, accMdata) _ named => + let (metaCount, mdataSize) := named.constMeta.exprMetaStats + (accMeta + metaCount, accMdata + mdataSize) + + -- Count ExprMeta by type: (binder, letBinder, ref, prj, mdata) + let (leanBinder, leanLet, leanRef, leanPrj, leanMd) := leanIxonEnv.named.fold (init := (0, 0, 0, 0, 0)) fun (b, l, r, p, m) _ named => + let (b', l', r', p', m') := named.constMeta.exprMetaByType + (b + b', l + l', r + r', p + p', m + m') + let (rustBinder, rustLet, rustRef, rustPrj, rustMd) := phases.compileEnv.named.fold (init := (0, 0, 0, 0, 0)) fun (b, l, r, p, m) _ named => + let (b', l', r', p', m') := named.constMeta.exprMetaByType + (b + b', l + l', r + r', p + p', m + m') + + IO.println s!"[Step 4] Serializing Lean env ({leanIxonEnv.constCount} consts)..." + IO.println s!"[Step 4] Lean env has {leanIxonEnv.named.size} named, {leanIxonEnv.blobs.size} blobs, {leanIxonEnv.names.size} names" + IO.println s!"[Step 4] Lean total ExprMeta entries: {leanTotalMetas}, total mdata items: {leanTotalMdata}" + IO.println s!"[Step 4] Lean ExprMeta by type: binder={leanBinder}, letBinder={leanLet}, ref={leanRef}, prj={leanPrj}, mdata={leanMd}" + + -- Analyze blob sizes + let leanBlobSizes := leanIxonEnv.blobs.toList.map (·.2.size) + let leanTotalBlobData := leanBlobSizes.foldl (· + ·) 0 + let leanMaxBlob := leanBlobSizes.foldl max 0 + let leanAvgBlob := if leanBlobSizes.length > 0 then leanTotalBlobData / leanBlobSizes.length else 0 + let leanBig := leanBlobSizes.filter (· > 1000) |>.length + let leanHuge := leanBlobSizes.filter (· > 100000) |>.length + let leanTopSizes := leanBlobSizes.toArray.insertionSort (· > ·) |>.toList.take 10 + IO.println s!"[Step 4] Lean blob stats: total={fmtBytes leanTotalBlobData}, max={fmtBytes leanMaxBlob}, avg={fmtBytes leanAvgBlob}, big(>1kB)={leanBig}, huge(>100kB)={leanHuge}" + IO.println s!"[Step 4] Lean top 10 blob sizes: {leanTopSizes.map fmtBytes}" + + let (leanBlobs, leanConsts, leanNames, leanNamed, leanComms) := Ixon.envSectionSizes leanIxonEnv + IO.println s!"[Step 4] Lean sections: blobs={fmtBytes leanBlobs}, consts={fmtBytes leanConsts}, names={fmtBytes leanNames}, named={fmtBytes leanNamed}, comms={fmtBytes leanComms}" + let leanEnvBytes := serializeEnv leanIxonEnv + IO.println s!"[Step 4] Lean env done: {fmtBytes leanEnvBytes.size}" + + IO.println s!"[Step 4] Serializing Rust env ({phases.compileEnv.constCount} consts)..." + IO.println s!"[Step 4] Rust env has {phases.compileEnv.named.size} named, {phases.compileEnv.blobs.size} blobs, {phases.compileEnv.names.size} names" + IO.println s!"[Step 4] Rust total ExprMeta entries: {rustTotalMetas}, total mdata items: {rustTotalMdata}" + IO.println s!"[Step 4] Rust ExprMeta by type: binder={rustBinder}, letBinder={rustLet}, ref={rustRef}, prj={rustPrj}, mdata={rustMd}" + + -- Analyze Rust blob sizes + let rustBlobSizes := phases.compileEnv.blobs.toList.map (·.2.size) + let rustTotalBlobData := rustBlobSizes.foldl (· + ·) 0 + let rustMaxBlob := rustBlobSizes.foldl max 0 + let rustAvgBlob := if rustBlobSizes.length > 0 then rustTotalBlobData / rustBlobSizes.length else 0 + let rustBig := rustBlobSizes.filter (· > 1000) |>.length + let rustHuge := rustBlobSizes.filter (· > 100000) |>.length + let rustTopSizes := rustBlobSizes.toArray.insertionSort (· > ·) |>.toList.take 10 + IO.println s!"[Step 4] Rust blob stats: total={fmtBytes rustTotalBlobData}, max={fmtBytes rustMaxBlob}, avg={fmtBytes rustAvgBlob}, big(>1kB)={rustBig}, huge(>100kB)={rustHuge}" + IO.println s!"[Step 4] Rust top 10 blob sizes: {rustTopSizes.map fmtBytes}" + + let (rustBlobs, rustConsts, rustNames, rustNamed, rustComms) := Ixon.envSectionSizes phases.compileEnv + IO.println s!"[Step 4] Rust sections: blobs={fmtBytes rustBlobs}, consts={fmtBytes rustConsts}, names={fmtBytes rustNames}, named={fmtBytes rustNamed}, comms={fmtBytes rustComms}" + let rustEnvBytes := serializeEnv phases.compileEnv + IO.println s!"[Step 4] Rust env done: {fmtBytes rustEnvBytes.size}" + let serTime := (← IO.monoMsNow) - serStart + + IO.println s!"[Step 4] Lean env: {fmtBytes leanEnvBytes.size}" + IO.println s!"[Step 4] Rust env: {fmtBytes rustEnvBytes.size}" + IO.println s!"[Step 4] Serialization time: {serTime}ms" + + if leanEnvBytes == rustEnvBytes then + IO.println s!"[Step 4] Serialized environments match exactly! ✓" + IO.println "" + + return (true, none) + else + IO.println s!"[Step 4] Serialized environments DIFFER" + if let some diffPos := findFirstDiff leanEnvBytes rustEnvBytes then + IO.println s!"[Step 4] First difference at byte {diffPos}:" + let leanByte := if diffPos < leanEnvBytes.size then s!"0x{String.ofList <| Nat.toDigits 16 (leanEnvBytes.get! diffPos).toNat}" else "EOF" + let rustByte := if diffPos < rustEnvBytes.size then s!"0x{String.ofList <| Nat.toDigits 16 (rustEnvBytes.get! diffPos).toNat}" else "EOF" + IO.println s!"[Step 4] Lean: {leanByte}" + IO.println s!"[Step 4] Rust: {rustByte}" + + -- Find blobs in Rust but not in Lean + let mut missingInLean : Array (Address × Nat) := #[] + for (addr, bytes) in phases.compileEnv.blobs.toList do + if !leanIxonEnv.blobs.contains addr then + missingInLean := missingInLean.push (addr, bytes.size) + if !missingInLean.isEmpty then + IO.println s!"[Step 4] Blobs in Rust but not Lean: {missingInLean.size}" + for (addr, size) in missingInLean.toList.take 10 do + -- Try to show content if small + if let some bytes := phases.compileEnv.blobs.get? addr then + if size < 100 then + let content := String.fromUTF8? bytes |>.getD s!"" + IO.println s!"[Step 4] {addr} ({fmtBytes size}): {content}" + else + IO.println s!"[Step 4] {addr} ({fmtBytes size})" + + -- Find blobs in Lean but not in Rust + let mut missingInRust : Array (Address × Nat) := #[] + for (addr, bytes) in leanIxonEnv.blobs.toList do + if !phases.compileEnv.blobs.contains addr then + missingInRust := missingInRust.push (addr, bytes.size) + if !missingInRust.isEmpty then + IO.println s!"[Step 4] Blobs in Lean but not Rust: {missingInRust.size}" + for (addr, size) in missingInRust.toList.take 10 do + if let some bytes := leanIxonEnv.blobs.get? addr then + if size < 100 then + let content := String.fromUTF8? bytes |>.getD s!"" + IO.println s!"[Step 4] {addr} ({fmtBytes size}): {content}" + else + IO.println s!"[Step 4] {addr} ({fmtBytes size})" + + IO.println "" + -- Note: We expect this to fail until Lean generates metadata + return (false, some s!"Serialized environments differ (Lean: {fmtBytes leanEnvBytes.size}, Rust: {fmtBytes rustEnvBytes.size})") + else + -- Report mismatches + IO.println s!"[Step 3] Found {result.mismatchedConstants.size} mismatches!" + IO.println "" + + -- Show first mismatch in detail + if let some first := result.mismatchedConstants[0]? then + IO.println s!"[Mismatch] First discrepancy: {first.name}" + IO.println s!" Lean address: {first.leanAddr}" + IO.println s!" Rust address: {first.rustAddr}" + IO.println "" + + IO.println s!" Lean bytes ({fmtBytes first.leanBytes.size}):" + IO.println s!"{hexDump first.leanBytes 128}" + IO.println "" + + IO.println s!" Rust bytes ({fmtBytes first.rustBytes.size}):" + IO.println s!"{hexDump first.rustBytes 128}" + IO.println "" + + if let some diffPos := findFirstDiff first.leanBytes first.rustBytes then + IO.println s!" First difference at byte {diffPos}:" + let leanByte := if diffPos < first.leanBytes.size then s!"0x{String.ofList <| Nat.toDigits 16 (first.leanBytes.get! diffPos).toNat}" else "EOF" + let rustByte := if diffPos < first.rustBytes.size then s!"0x{String.ofList <| Nat.toDigits 16 (first.rustBytes.get! diffPos).toNat}" else "EOF" + IO.println s!" Lean: {leanByte}" + IO.println s!" Rust: {rustByte}" + IO.println "" + + if !result.missingInRust.isEmpty then + IO.println s!"[Missing] {result.missingInRust.size} constants in Lean but not in Rust" + for name in result.missingInRust.toList.take 5 do + IO.println s!" - {name}" + if result.missingInRust.size > 5 then + IO.println s!" ... and {result.missingInRust.size - 5} more" + IO.println "" + + if !result.missingInLean.isEmpty then + IO.println s!"[Missing] {result.missingInLean.size} constants in Rust but not in Lean" + for name in result.missingInLean.toList.take 5 do + IO.println s!" - {name}" + if result.missingInLean.size > 5 then + IO.println s!" ... and {result.missingInLean.size - 5} more" + IO.println "" + + return (false, some s!"Found {result.mismatchedConstants.size} mismatches") + ) .done + +/-! ## Test Suite -/ + +def compileSuiteIO : List TestSeq := [ + testCrossImpl, ] + +end Tests.Compile diff --git a/Tests/Ix/CondenseM.lean b/Tests/Ix/CondenseM.lean new file mode 100644 index 00000000..d54d85fe --- /dev/null +++ b/Tests/Ix/CondenseM.lean @@ -0,0 +1,383 @@ +/- + Tests for Ix.CondenseM module. + - Unit tests for SCC computation mirroring Rust test cases + - Cross-implementation tests comparing Lean vs Rust SCC results +-/ + +import Ix.GraphM +import Ix.CondenseM +import Ix.Environment +import Ix.Meta +import LSpec + +open LSpec Ix + +namespace Tests.Ix.CondenseM + +/-! ## Helper functions -/ + +/-- Create a simple Ix.Name from a string -/ +def mkName (s : String) : Ix.Name := Ix.Name.mkStr Ix.Name.mkAnon s + +/-- Build a reference graph from adjacency list -/ +def buildGraph (edges : List (String × List String)) : Map Ix.Name (Set Ix.Name) := + edges.foldl (init := {}) fun acc (src, dsts) => + let srcName := mkName src + let dstSet := dsts.foldl (init := {}) fun s d => s.insert (mkName d) + acc.insert srcName dstSet + +/-- Extract SCC structure as sorted list of sorted lists (for deterministic comparison) -/ +def sccsToSorted (blocks : CondensedBlocks) : List (List String) := + let sccs := blocks.blocks.toList.map fun (_, members) => + (members.toList.map toString).mergeSort + sccs.mergeSort + +/-- Check if two SCC results are equivalent (same SCCs, ignoring order) -/ +def sccsEq (actual expected : List (List String)) : Bool := + actual.length == expected.length && + actual.all (expected.contains ·) + +/-! ## Test cases (mirroring Rust's src/ix/condense.rs tests) -/ + +/-- Test 1: Single node with no edges → one SCC containing just that node -/ +def testSingleNode : TestSeq := + let graph := buildGraph [("A", [])] + let sccs := Ix.CondenseM.run graph + let result := sccsToSorted sccs + test "single node: 1 SCC" (result.length == 1) ++ + test "single node: SCC contains A" (result == [["A"]]) + +/-- Test 2: Simple cycle A→B→A → one SCC containing both -/ +def testSimpleCycle : TestSeq := + let graph := buildGraph [("A", ["B"]), ("B", ["A"])] + let sccs := Ix.CondenseM.run graph + let result := sccsToSorted sccs + test "simple cycle: 1 SCC" (result.length == 1) ++ + test "simple cycle: SCC contains A,B" (result == [["A", "B"]]) + +/-- Test 3: Chain with no cycle A→B→C → three separate SCCs -/ +def testChainNoCycle : TestSeq := + let graph := buildGraph [("A", ["B"]), ("B", ["C"]), ("C", [])] + let sccs := Ix.CondenseM.run graph + let result := sccsToSorted sccs + test "chain: 3 SCCs" (result.length == 3) ++ + test "chain: each singleton" (sccsEq result [["A"], ["B"], ["C"]]) + +/-- Test 4: Two cycles connected: A↔B→C↔D → two SCCs -/ +def testTwoCyclesConnected : TestSeq := + let graph := buildGraph [ + ("A", ["B"]), + ("B", ["A", "C"]), + ("C", ["D"]), + ("D", ["C"]) + ] + let sccs := Ix.CondenseM.run graph + let result := sccsToSorted sccs + test "two cycles: 2 SCCs" (result.length == 2) ++ + test "two cycles: correct SCCs" (sccsEq result [["A", "B"], ["C", "D"]]) + +/-- Test 5: Complex graph with 8 nodes → 3 SCCs + Graph: A→B, B→{C,E,F}, C→{D,G}, D→{C,H}, E→{A,F}, F→G, G→F, H→{D,G} + Expected SCCs: {A,B,E}, {C,D,H}, {F,G} -/ +def testComplexGraph : TestSeq := + let graph := buildGraph [ + ("A", ["B"]), + ("B", ["C", "E", "F"]), + ("C", ["D", "G"]), + ("D", ["C", "H"]), + ("E", ["A", "F"]), + ("F", ["G"]), + ("G", ["F"]), + ("H", ["D", "G"]) + ] + let sccs := Ix.CondenseM.run graph + let result := sccsToSorted sccs + test "complex: 3 SCCs" (result.length == 3) ++ + test "complex: correct SCCs" (sccsEq result [["A", "B", "E"], ["C", "D", "H"], ["F", "G"]]) + +/-! ## Additional edge case tests -/ + +/-- Empty graph -/ +def testEmptyGraph : TestSeq := + let graph : Map Ix.Name (Set Ix.Name) := {} + let sccs := Ix.CondenseM.run graph + let result := sccsToSorted sccs + test "empty: 0 SCCs" (result.length == 0) + +/-- Self-loop: A→A -/ +def testSelfLoop : TestSeq := + let graph := buildGraph [("A", ["A"])] + let sccs := Ix.CondenseM.run graph + let result := sccsToSorted sccs + test "self-loop: 1 SCC" (result.length == 1) ++ + test "self-loop: contains A" (result == [["A"]]) + +/-- Multiple disconnected components -/ +def testDisconnected : TestSeq := + let graph := buildGraph [ + ("A", ["B"]), + ("B", ["A"]), + ("C", ["D"]), + ("D", ["C"]), + ("E", []) + ] + let sccs := Ix.CondenseM.run graph + let result := sccsToSorted sccs + test "disconnected: 3 SCCs" (result.length == 3) ++ + test "disconnected: correct SCCs" (sccsEq result [["A", "B"], ["C", "D"], ["E"]]) + +/-- Linear chain -/ +def testLinearChain : TestSeq := + let graph := buildGraph [ + ("A", ["B"]), + ("B", ["C"]), + ("C", ["D"]), + ("D", ["E"]), + ("E", []) + ] + let sccs := Ix.CondenseM.run graph + let result := sccsToSorted sccs + test "linear: 5 SCCs" (result.length == 5) ++ + test "linear: all singletons" (result.all (·.length == 1)) + +/-- Large cycle -/ +def testLargeCycle : TestSeq := + let graph := buildGraph [ + ("A", ["B"]), + ("B", ["C"]), + ("C", ["D"]), + ("D", ["E"]), + ("E", ["A"]) + ] + let sccs := Ix.CondenseM.run graph + let result := sccsToSorted sccs + test "large cycle: 1 SCC" (result.length == 1) ++ + test "large cycle: contains all 5" (result == [["A", "B", "C", "D", "E"]]) + +/-! ## Test lowLinks correctness -/ + +def testLowLinksSimpleCycle : TestSeq := + let graph := buildGraph [("A", ["B"]), ("B", ["A"])] + let sccs := Ix.CondenseM.run graph + -- All nodes in the same SCC should have the same lowLink + let aLow := sccs.lowLinks.get? (mkName "A") + let bLow := sccs.lowLinks.get? (mkName "B") + test "lowLinks: A has lowLink" aLow.isSome ++ + test "lowLinks: B has lowLink" bLow.isSome ++ + test "lowLinks: A and B same root" (aLow == bLow) + +def testLowLinksChain : TestSeq := + let graph := buildGraph [("A", ["B"]), ("B", ["C"]), ("C", [])] + let sccs := Ix.CondenseM.run graph + -- Each node should be its own root in a chain + let aLow := sccs.lowLinks.get? (mkName "A") + let bLow := sccs.lowLinks.get? (mkName "B") + let cLow := sccs.lowLinks.get? (mkName "C") + test "chain lowLinks: A is own root" (aLow == some (mkName "A")) ++ + test "chain lowLinks: B is own root" (bLow == some (mkName "B")) ++ + test "chain lowLinks: C is own root" (cLow == some (mkName "C")) + +/-! ## Test blockRefs correctness -/ + +def testBlockRefsChain : TestSeq := + let graph := buildGraph [("A", ["B"]), ("B", ["C"]), ("C", [])] + let sccs := Ix.CondenseM.run graph + -- A's block should reference B's block + let aBlockRefs := sccs.blockRefs.get? (mkName "A") + let bBlockRefs := sccs.blockRefs.get? (mkName "B") + let cBlockRefs := sccs.blockRefs.get? (mkName "C") + test "blockRefs: A refs B" (aBlockRefs.map (·.contains (mkName "B")) |>.getD false) ++ + test "blockRefs: B refs C" (bBlockRefs.map (·.contains (mkName "C")) |>.getD false) ++ + test "blockRefs: C refs nothing" (cBlockRefs.map (·.isEmpty) |>.getD true) + +def testBlockRefsTwoCycles : TestSeq := + let graph := buildGraph [ + ("A", ["B"]), + ("B", ["A", "C"]), + ("C", ["D"]), + ("D", ["C"]) + ] + let sccs := Ix.CondenseM.run graph + -- The {A,B} SCC should reference the {C,D} SCC + -- Find the root of A's SCC + let aRoot := sccs.lowLinks.get? (mkName "A") + match aRoot with + | some root => + let blockRefs := sccs.blockRefs.get? root + -- Should contain C (which is in the other SCC) + test "blockRefs: AB SCC refs C" (blockRefs.map (·.contains (mkName "C")) |>.getD false) + | none => test "blockRefs: found A's root" false + +/-! ## Full Test Suite (unit tests) -/ + +def suite : List TestSeq := [ + group "basic SCCs" ( + testSingleNode ++ + testSimpleCycle ++ + testChainNoCycle ++ + testTwoCyclesConnected ++ + testComplexGraph + ), + group "edge cases" ( + testEmptyGraph ++ + testSelfLoop ++ + testDisconnected ++ + testLinearChain ++ + testLargeCycle + ), + group "lowLinks" ( + testLowLinksSimpleCycle ++ + testLowLinksChain + ), + group "blockRefs" ( + testBlockRefsChain ++ + testBlockRefsTwoCycles + ) +] + +/-! ## Cross-Implementation Tests (Lean vs Rust) -/ + +/-- Canonicalize environment in Rust (fast). Returns Ix.RawEnvironment. -/ +@[extern "rs_canonicalize_env_to_ix"] +opaque rsCanonicalizeEnvToIxRaw : + @& List (Lean.Name × Lean.ConstantInfo) → IO Ix.RawEnvironment + +/-- Compute SCCs in Rust. + Returns RustCondensedBlocks (defined in Ix.CondenseM) -/ +@[extern "rs_compute_sccs"] +opaque rsComputeSccs : @& List (Lean.Name × Lean.ConstantInfo) → + IO Ix.RustCondensedBlocks + +/-- Convert Rust lowLinks array to HashMap. -/ +def rustLowLinksToMap (arr : Array (Ix.Name × Ix.Name)) + : Std.HashMap Ix.Name Ix.Name := Id.run do + let mut m : Std.HashMap Ix.Name Ix.Name := {} + for (name, root) in arr do + m := m.insert name root + return m + +/-- Convert Rust's blocks array to a HashMap. -/ +def rustBlocksToMap (arr : Array (Ix.Name × Array Ix.Name)) + : Std.HashMap Ix.Name (Std.HashSet Ix.Name) := Id.run do + let mut m : Std.HashMap Ix.Name (Std.HashSet Ix.Name) := {} + for (name, members) in arr do + let memberSet := members.foldl (init := {}) fun s n => s.insert n + m := m.insert name memberSet + return m + +/-- Convert Lean's CondensedBlocks.blocks to a HashMap. -/ +def leanBlocksToHashMap (m : Ix.Map Ix.Name (Ix.Set Ix.Name)) + : Std.HashMap Ix.Name (Std.HashSet Ix.Name) := Id.run do + let mut result : Std.HashMap Ix.Name (Std.HashSet Ix.Name) := {} + for (name, members) in m do + let memberSet : Std.HashSet Ix.Name := members.fold (init := {}) fun s n => s.insert n + result := result.insert name memberSet + return result + +/-- Compare SCC results efficiently by checking block counts and sizes. + Returns (matches, totalChecked, mismatches). -/ +def compareSCCResults + (leanBlocks rustBlocks : Std.HashMap Ix.Name (Std.HashSet Ix.Name)) + (rustLowLinks : Std.HashMap Ix.Name Ix.Name) + : Bool × Nat × Array String := Id.run do + let mut mismatches : Array String := #[] + let mut checked : Nat := 0 + + -- Check that block counts match + if leanBlocks.size != rustBlocks.size then + mismatches := mismatches.push s!"Block count mismatch: Lean={leanBlocks.size}, Rust={rustBlocks.size}" + return (false, 0, mismatches) + + -- For each Lean block, find corresponding Rust block and compare sizes + for (_leanRoot, leanMembers) in leanBlocks do + checked := checked + 1 + -- Find any member's Rust root + let someMember := leanMembers.fold (init := none) fun acc n => + if acc.isNone then some n else acc + match someMember with + | none => continue + | some member => + match rustLowLinks.get? member with + | none => + if mismatches.size < 5 then + mismatches := mismatches.push s!"Member {member} not in Rust lowLinks" + | some rustRoot => + match rustBlocks.get? rustRoot with + | none => + if mismatches.size < 5 then + mismatches := mismatches.push s!"Rust root {rustRoot} not in blocks" + | some rustMembers => + if leanMembers.size != rustMembers.size then + if mismatches.size < 5 then + mismatches := mismatches.push s!"Size mismatch for SCC containing {member}: Lean={leanMembers.size}, Rust={rustMembers.size}" + + return (mismatches.isEmpty, checked, mismatches) + +/-- Cross-implementation test: compare Lean and Rust SCC computation -/ +def testSccComparison : TestSeq := + .individualIO "SCC Computation: Lean vs Rust" (do + let env ← get_env! + let numConsts := env.constants.toList.length + + IO.println s!"[Test] SCC Computation Comparison Test" + IO.println s!"[Test] Environment has {numConsts} constants" + IO.println "" + + -- Step 0: Canonicalize environment using Rust FFI (fast) + IO.println s!"[Test] Step 0: Canonicalizing environment via Rust FFI..." + let canonStart ← IO.monoMsNow + let rawEnv ← rsCanonicalizeEnvToIxRaw env.constants.toList + let ixEnv := rawEnv.toEnvironment + let canonTime := (← IO.monoMsNow) - canonStart + IO.println s!"[Test] Canonicalized {ixEnv.consts.size} constants in {canonTime}ms" + IO.println "" + + -- Step 1: Compute SCCs in Rust + IO.println s!"[Test] Step 1: Computing SCCs in Rust..." + let rustStart ← IO.monoMsNow + let rustSccs ← rsComputeSccs env.constants.toList + let rustTime := (← IO.monoMsNow) - rustStart + IO.println s!"[Test] Rust: {rustSccs.blocks.size} SCCs, {rustSccs.lowLinks.size} lowLinks in {rustTime}ms" + + -- Step 2: Compute SCCs in Lean (using pre-canonicalized environment) + IO.println s!"[Test] Step 2: Computing SCCs in Lean..." + let leanStart ← IO.monoMsNow + let leanRefMap := Ix.GraphM.envParallel ixEnv + let sccs := Ix.CondenseM.run leanRefMap + IO.print s!"[Test] Lean: {sccs.blocks.size} SCCs, {sccs.lowLinks.size} lowLinks " + let leanTime := (← IO.monoMsNow) - leanStart + IO.println s!"in {leanTime}ms" + IO.println "" + + -- Step 3: Compare SCC content + IO.println s!"[Test] Step 3: Comparing SCC content..." + let rustBlocksMap := rustBlocksToMap rustSccs.blocks + let leanBlocksMap := leanBlocksToHashMap sccs.blocks + let rustLowLinksMap := rustLowLinksToMap rustSccs.lowLinks + + let (sccsMatch, checkedCount, mismatches) := + compareSCCResults leanBlocksMap rustBlocksMap rustLowLinksMap + + for msg in mismatches do + IO.println s!"[Test] {msg}" + + IO.println "" + IO.println s!"[Test] Summary:" + IO.println s!"[Test] Canon time: {canonTime}ms" + IO.println s!"[Test] Rust SCC time: {rustTime}ms" + IO.println s!"[Test] Lean SCC time: {leanTime}ms" + IO.println s!"[Test] Checked {checkedCount} SCCs" + IO.println s!"[Test] SCCs match: {sccsMatch}" + + if !sccsMatch then + return (false, some s!"SCCs do not match: {mismatches.size} mismatches") + + return (true, none) + ) .done + +/-- Cross-implementation test suite (expensive, run with --ignored) -/ +def suiteIO : List TestSeq := [ + testSccComparison +] + +end Tests.Ix.CondenseM diff --git a/Tests/Ix/Decompile.lean b/Tests/Ix/Decompile.lean new file mode 100644 index 00000000..2d659f34 --- /dev/null +++ b/Tests/Ix/Decompile.lean @@ -0,0 +1,128 @@ +/- + Decompilation tests. + Runs the Rust compilation pipeline, then decompiles back to Ix constants + and compares via content hashes. +-/ + +import Ix.Ixon +import Ix.Environment +import Ix.Address +import Ix.Common +import Ix.Meta +import Ix.CompileM +import Ix.DecompileM +import Lean +import LSpec +import Tests.Ix.Fixtures + +open LSpec + +namespace Tests.Decompile + +/-- Decompile roundtrip test: Rust compile → parallel decompile → hash comparison -/ +def testDecompile : TestSeq := + .individualIO "Decompilation Roundtrip" (do + let leanEnv ← get_env! + let totalConsts := leanEnv.constants.toList.length + + IO.println s!"[Test] Decompilation Roundtrip Test" + IO.println s!"[Test] Environment has {totalConsts} constants" + IO.println "" + + -- Step 1: Run Rust compilation pipeline + IO.println s!"[Step 1] Running Rust compilation pipeline..." + let rustStart ← IO.monoMsNow + let phases ← Ix.CompileM.rsCompilePhases leanEnv + let rustTime := (← IO.monoMsNow) - rustStart + IO.println s!"[Step 1] Rust: {phases.compileEnv.constCount} compiled in {rustTime}ms" + IO.println s!"[Step 1] names={phases.compileEnv.names.size}, named={phases.compileEnv.named.size}, consts={phases.compileEnv.consts.size}, blobs={phases.compileEnv.blobs.size}" + IO.println "" + + -- Step 2: Parallel decompile to Ix types + IO.println s!"[Step 2] Decompiling (parallel) to Ix types..." + let (decompiled, decompErrors) ← Ix.DecompileM.decompileAllParallelIO phases.compileEnv + IO.println "" + + -- Report errors + if !decompErrors.isEmpty then + IO.println s!"[Errors] First 20 errors:" + for (name, err) in decompErrors.toList.take 20 do + IO.println s!" {name}: {err}" + IO.println "" + + -- Count by constant type + let mut nDefn := (0 : Nat); let mut nAxiom := (0 : Nat) + let mut nInduct := (0 : Nat); let mut nCtor := (0 : Nat) + let mut nRec := (0 : Nat); let mut nQuot := (0 : Nat) + let mut nOpaque := (0 : Nat); let mut nThm := (0 : Nat) + for (_, info) in decompiled do + match info with + | .defnInfo _ => nDefn := nDefn + 1 + | .axiomInfo _ => nAxiom := nAxiom + 1 + | .inductInfo _ => nInduct := nInduct + 1 + | .ctorInfo _ => nCtor := nCtor + 1 + | .recInfo _ => nRec := nRec + 1 + | .quotInfo _ => nQuot := nQuot + 1 + | .opaqueInfo _ => nOpaque := nOpaque + 1 + | .thmInfo _ => nThm := nThm + 1 + IO.println s!"[Types] defn={nDefn}, thm={nThm}, opaque={nOpaque}, axiom={nAxiom}, induct={nInduct}, ctor={nCtor}, rec={nRec}, quot={nQuot}" + IO.println "" + + -- Step 3: Hash-based comparison against original Ix.Environment + let ixEnv := phases.rawEnv + IO.println s!"[Step 3] Original Ix.Environment has {ixEnv.consts.size} constants" + + IO.println s!"[Compare] Hash-comparing {decompiled.size} decompiled constants..." + let compareStart ← IO.monoMsNow + + -- Sequential hash comparison (cheap: just address equality on 32-byte hashes) + let mut nMatch := (0 : Nat); let mut nMismatch := (0 : Nat); let mut nMissing := (0 : Nat) + let mut firstMismatches : Array (Ix.Name × String) := #[] + for (name, decompInfo) in decompiled do + match ixEnv.consts.get? name with + | some origInfo => + let decompTyHash := decompInfo.getCnst.type.getHash + let origTyHash := origInfo.getCnst.type.getHash + if decompTyHash != origTyHash then + nMismatch := nMismatch + 1 + if firstMismatches.size < 10 then + firstMismatches := firstMismatches.push (name, s!"type hash mismatch") + else + let valMismatch := match decompInfo, origInfo with + | .defnInfo dv, .defnInfo ov => dv.value.getHash != ov.value.getHash + | .thmInfo dv, .thmInfo ov => dv.value.getHash != ov.value.getHash + | .opaqueInfo dv, .opaqueInfo ov => dv.value.getHash != ov.value.getHash + | _, _ => false + if valMismatch then + nMismatch := nMismatch + 1 + if firstMismatches.size < 10 then + firstMismatches := firstMismatches.push (name, s!"value hash mismatch") + else + nMatch := nMatch + 1 + | none => + nMissing := nMissing + 1 + if firstMismatches.size < 10 then + firstMismatches := firstMismatches.push (name, "not in original") + + let compareTime := (← IO.monoMsNow) - compareStart + IO.println s!"[Compare] Matched: {nMatch}, Mismatched: {nMismatch}, Missing: {nMissing} ({compareTime}ms)" + if !firstMismatches.isEmpty then + IO.println s!"[Compare] First mismatches:" + for (name, diff) in firstMismatches do + IO.println s!" {name}: {diff}" + IO.println "" + + let success := decompErrors.size == 0 && nMismatch == 0 && nMissing == 0 + if success then + return (true, none) + else + return (false, some s!"{decompErrors.size} decompilation errors") + ) .done + +/-! ## Test Suite -/ + +def decompileSuiteIO : List TestSeq := [ + testDecompile, +] + +end Tests.Decompile diff --git a/Tests/Ix/GraphM.lean b/Tests/Ix/GraphM.lean new file mode 100644 index 00000000..29458ca7 --- /dev/null +++ b/Tests/Ix/GraphM.lean @@ -0,0 +1,469 @@ +/- + Tests for Ix.GraphM module. + - Unit tests for reference extraction from expressions and constants + - Cross-implementation tests comparing Lean vs Rust graph construction +-/ + +import Ix.GraphM +import Ix.Environment +import Ix.Meta +import LSpec + +open LSpec Ix + +namespace Tests.Ix.GraphM + +/-! ## Helper functions -/ + +/-- Create a simple Ix.Name from a string -/ +def mkName (s : String) : Ix.Name := Ix.Name.mkStr Ix.Name.mkAnon s + +/-- Create an Ix constant expression -/ +def mkConstExpr (s : String) : Ix.Expr := Ix.Expr.mkConst (mkName s) #[] + +/-- Simple type expression (Nat) -/ +def natType : Ix.Expr := mkConstExpr "Nat" + +/-- Create a simple ConstantVal -/ +def mkConstVal (name : String) (type : Ix.Expr) : Ix.ConstantVal := + { name := mkName name, levelParams := #[], type := type } + +/-- Convert Set to sorted list for deterministic comparison -/ +def setToSortedList (s : Set Ix.Name) : List String := + (s.toList.map toString).mergeSort + +/-! ## Test: graphExpr reference extraction -/ + +def testGraphExprConst : TestSeq := + let env : Ix.Environment := { consts := {} } + let expr := mkConstExpr "Foo" + let (refs, _) := Ix.GraphM.run env .init (Ix.graphExpr expr) + test "const extracts name" (refs.contains (mkName "Foo")) + +def testGraphExprBvar : TestSeq := + let env : Ix.Environment := { consts := {} } + let expr := Ix.Expr.mkBVar 0 + let (refs, _) := Ix.GraphM.run env .init (Ix.graphExpr expr) + test "bvar has no refs" (refs.isEmpty) + +def testGraphExprSort : TestSeq := + let env : Ix.Environment := { consts := {} } + let expr := Ix.Expr.mkSort Ix.Level.mkZero + let (refs, _) := Ix.GraphM.run env .init (Ix.graphExpr expr) + test "sort has no refs" (refs.isEmpty) + +def testGraphExprApp : TestSeq := + let env : Ix.Environment := { consts := {} } + let f := mkConstExpr "f" + let a := mkConstExpr "a" + let expr := Ix.Expr.mkApp f a + let (refs, _) := Ix.GraphM.run env .init (Ix.graphExpr expr) + test "app collects f" (refs.contains (mkName "f")) ++ + test "app collects a" (refs.contains (mkName "a")) ++ + test "app has 2 refs" (refs.size == 2) + +def testGraphExprLam : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let body := mkConstExpr "B" + let expr := Ix.Expr.mkLam (mkName "x") ty body .default + let (refs, _) := Ix.GraphM.run env .init (Ix.graphExpr expr) + test "lam collects type" (refs.contains (mkName "T")) ++ + test "lam collects body" (refs.contains (mkName "B")) ++ + test "lam has 2 refs" (refs.size == 2) + +def testGraphExprForallE : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let body := mkConstExpr "B" + let expr := Ix.Expr.mkForallE (mkName "x") ty body .default + let (refs, _) := Ix.GraphM.run env .init (Ix.graphExpr expr) + test "forallE collects type" (refs.contains (mkName "T")) ++ + test "forallE collects body" (refs.contains (mkName "B")) ++ + test "forallE has 2 refs" (refs.size == 2) + +def testGraphExprLetE : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let val := mkConstExpr "V" + let body := mkConstExpr "B" + let expr := Ix.Expr.mkLetE (mkName "x") ty val body false + let (refs, _) := Ix.GraphM.run env .init (Ix.graphExpr expr) + test "letE collects type" (refs.contains (mkName "T")) ++ + test "letE collects val" (refs.contains (mkName "V")) ++ + test "letE collects body" (refs.contains (mkName "B")) ++ + test "letE has 3 refs" (refs.size == 3) + +def testGraphExprProj : TestSeq := + let env : Ix.Environment := { consts := {} } + let struct := mkConstExpr "S" + let expr := Ix.Expr.mkProj (mkName "MyType") 0 struct + let (refs, _) := Ix.GraphM.run env .init (Ix.graphExpr expr) + test "proj collects typeName" (refs.contains (mkName "MyType")) ++ + test "proj collects struct" (refs.contains (mkName "S")) ++ + test "proj has 2 refs" (refs.size == 2) + +def testGraphExprLit : TestSeq := + let env : Ix.Environment := { consts := {} } + let expr := Ix.Expr.mkLit (.natVal 42) + let (refs, _) := Ix.GraphM.run env .init (Ix.graphExpr expr) + test "lit has no refs" (refs.isEmpty) + +/-! ## Test: graphConst for each ConstantInfo variant -/ + +def testGraphConstAxiom : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let ax := Ix.ConstantInfo.axiomInfo { + cnst := mkConstVal "myAxiom" ty, + isUnsafe := false + } + let (refs, _) := Ix.GraphM.run env .init (Ix.graphConst ax) + test "axiom refs type" (refs.contains (mkName "T")) ++ + test "axiom has 1 ref" (refs.size == 1) + +def testGraphConstDefn : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let val := mkConstExpr "V" + let defn := Ix.ConstantInfo.defnInfo { + cnst := mkConstVal "myDef" ty, + value := val, + hints := .opaque, + safety := .safe, + all := #[] + } + let (refs, _) := Ix.GraphM.run env .init (Ix.graphConst defn) + test "defn refs type" (refs.contains (mkName "T")) ++ + test "defn refs value" (refs.contains (mkName "V")) ++ + test "defn has 2 refs" (refs.size == 2) + +def testGraphConstThm : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let val := mkConstExpr "V" + let thm := Ix.ConstantInfo.thmInfo { + cnst := mkConstVal "myThm" ty, + value := val, + all := #[] + } + let (refs, _) := Ix.GraphM.run env .init (Ix.graphConst thm) + test "thm refs type" (refs.contains (mkName "T")) ++ + test "thm refs value" (refs.contains (mkName "V")) ++ + test "thm has 2 refs" (refs.size == 2) + +def testGraphConstOpaque : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let val := mkConstExpr "V" + let opq := Ix.ConstantInfo.opaqueInfo { + cnst := mkConstVal "myOpaque" ty, + value := val, + isUnsafe := false, + all := #[] + } + let (refs, _) := Ix.GraphM.run env .init (Ix.graphConst opq) + test "opaque refs type" (refs.contains (mkName "T")) ++ + test "opaque refs value" (refs.contains (mkName "V")) ++ + test "opaque has 2 refs" (refs.size == 2) + +def testGraphConstQuot : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let quot := Ix.ConstantInfo.quotInfo { + cnst := mkConstVal "myQuot" ty, + kind := .type + } + let (refs, _) := Ix.GraphM.run env .init (Ix.graphConst quot) + test "quot refs type" (refs.contains (mkName "T")) ++ + test "quot has 1 ref" (refs.size == 1) + +def testGraphConstInduct : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let induct := Ix.ConstantInfo.inductInfo { + cnst := mkConstVal "MyInductive" ty, + numParams := 0, + numIndices := 0, + all := #[mkName "MyInductive"], + ctors := #[mkName "MyInductive.mk"], + numNested := 0, + isRec := false, + isUnsafe := false, + isReflexive := false + } + let (refs, _) := Ix.GraphM.run env .init (Ix.graphConst induct) + test "induct refs type" (refs.contains (mkName "T")) ++ + test "induct refs ctor name" (refs.contains (mkName "MyInductive.mk")) ++ + test "induct has 2 refs" (refs.size == 2) + +def testGraphConstCtor : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let ctor := Ix.ConstantInfo.ctorInfo { + cnst := mkConstVal "MyType.mk" ty, + induct := mkName "MyType", + cidx := 0, + numParams := 0, + numFields := 0, + isUnsafe := false + } + let (refs, _) := Ix.GraphM.run env .init (Ix.graphConst ctor) + test "ctor refs type" (refs.contains (mkName "T")) ++ + test "ctor refs induct" (refs.contains (mkName "MyType")) ++ + test "ctor has 2 refs" (refs.size == 2) + +def testGraphConstRec : TestSeq := + let env : Ix.Environment := { consts := {} } + let ty := mkConstExpr "T" + let rhs := mkConstExpr "R" + let recConst := Ix.ConstantInfo.recInfo { + cnst := mkConstVal "MyType.rec" ty, + all := #[mkName "MyType"], + numParams := 0, + numIndices := 0, + numMotives := 1, + numMinors := 1, + rules := #[{ ctor := mkName "MyType.mk", nfields := 0, rhs := rhs }], + k := false, + isUnsafe := false + } + let (refs, _) := Ix.GraphM.run env .init (Ix.graphConst recConst) + test "rec refs type" (refs.contains (mkName "T")) ++ + test "rec refs ctor name" (refs.contains (mkName "MyType.mk")) ++ + test "rec refs rhs" (refs.contains (mkName "R")) ++ + test "rec has 3 refs" (refs.size == 3) + +/-! ## Test: GraphM.env builds complete graph -/ + +def testGraphMEnv : TestSeq := + -- Create a small synthetic environment + let a := mkName "A" + let b := mkName "B" + let c := mkName "C" + + -- A: type refs B + let aConst := Ix.ConstantInfo.axiomInfo { + cnst := { name := a, levelParams := #[], type := mkConstExpr "B" }, + isUnsafe := false + } + -- B: type refs C + let bConst := Ix.ConstantInfo.axiomInfo { + cnst := { name := b, levelParams := #[], type := mkConstExpr "C" }, + isUnsafe := false + } + -- C: type refs nothing (just a sort) + let cConst := Ix.ConstantInfo.axiomInfo { + cnst := { name := c, levelParams := #[], type := Ix.Expr.mkSort Ix.Level.mkZero }, + isUnsafe := false + } + + let env : Ix.Environment := { + consts := ({} : Std.HashMap _ _).insert a aConst |>.insert b bConst |>.insert c cConst + } + + let graph := Ix.GraphM.env env + test "graph has 3 entries" (graph.size == 3) ++ + test "A refs B" ((graph.get? a).map (·.contains b) |>.getD false) ++ + test "B refs C" ((graph.get? b).map (·.contains c) |>.getD false) ++ + test "C refs empty" ((graph.get? c).map (·.isEmpty) |>.getD false) + +/-! ## Test: envParallel vs envSerial equivalence -/ + +def testEnvParallelVsSerial : TestSeq := Id.run do + -- Create a synthetic environment + let names := #["A", "B", "C", "D", "E"].map mkName + let mut consts : Std.HashMap Ix.Name Ix.ConstantInfo := {} + + -- Create chain: A->B->C->D->E->() + for i in [:names.size] do + let name := names[i]! + let ty := if i + 1 < names.size + then Ix.Expr.mkConst names[i+1]! #[] + else Ix.Expr.mkSort Ix.Level.mkZero + let c := Ix.ConstantInfo.axiomInfo { + cnst := { name := name, levelParams := #[], type := ty }, + isUnsafe := false + } + consts := consts.insert name c + + let env : Ix.Environment := { consts := consts } + + let graphParallel := Ix.GraphM.envParallel env + let graphSerial := Ix.GraphM.envSerial env + + -- Compare sizes + let sizeMatch := graphParallel.size == graphSerial.size + + -- Compare all entries + let mut allMatch := true + for (name, parallelRefs) in graphParallel do + match graphSerial.get? name with + | none => allMatch := false + | some serialRefs => + if parallelRefs.size != serialRefs.size then + allMatch := false + else + for r in parallelRefs do + if !serialRefs.contains r then + allMatch := false + + return test "sizes match" sizeMatch ++ + test "all entries match" allMatch + +/-! ## Full Test Suite (unit tests) -/ + +def suite : List TestSeq := [ + group "graphExpr" ( + testGraphExprConst ++ + testGraphExprBvar ++ + testGraphExprSort ++ + testGraphExprApp ++ + testGraphExprLam ++ + testGraphExprForallE ++ + testGraphExprLetE ++ + testGraphExprProj ++ + testGraphExprLit + ), + group "graphConst" ( + testGraphConstAxiom ++ + testGraphConstDefn ++ + testGraphConstThm ++ + testGraphConstOpaque ++ + testGraphConstQuot ++ + testGraphConstInduct ++ + testGraphConstCtor ++ + testGraphConstRec + ), + group "GraphM.env" ( + testGraphMEnv ++ + testEnvParallelVsSerial + ) +] + +/-! ## Cross-Implementation Tests (Lean vs Rust) -/ + +/-- Canonicalize environment in Rust (fast). Returns Ix.RawEnvironment. -/ +@[extern "rs_canonicalize_env_to_ix"] +opaque rsCanonicalizeEnvToIxRaw : + @& List (Lean.Name × Lean.ConstantInfo) → IO Ix.RawEnvironment + +/-- Build reference graph in Rust. + Returns Array (Ix.Name × Array Ix.Name) -/ +@[extern "rs_build_ref_graph"] +opaque rsBuildRefGraph : @& List (Lean.Name × Lean.ConstantInfo) → + IO (Array (Ix.Name × Array Ix.Name)) + +/-- Convert Rust's ref graph array to a HashMap for comparison. -/ +def rustRefGraphToMap (arr : Array (Ix.Name × Array Ix.Name)) + : Std.HashMap Ix.Name (Std.HashSet Ix.Name) := Id.run do + let mut m : Std.HashMap Ix.Name (Std.HashSet Ix.Name) := {} + for (name, refs) in arr do + let refSet := refs.foldl (init := {}) fun s n => s.insert n + m := m.insert name refSet + return m + +/-- Convert Lean's ref graph (Map) to a HashMap for comparison. -/ +def leanRefGraphToHashMap (m : Ix.Map Ix.Name (Ix.Set Ix.Name)) + : Std.HashMap Ix.Name (Std.HashSet Ix.Name) := Id.run do + let mut result : Std.HashMap Ix.Name (Std.HashSet Ix.Name) := {} + for (name, refs) in m do + let refSet : Std.HashSet Ix.Name := refs.fold (init := {}) fun s n => s.insert n + result := result.insert name refSet + return result + +/-- Compare two reference graphs for equality. Returns (isEqual, mismatches). -/ +def compareRefGraphs (lean rust : Std.HashMap Ix.Name (Std.HashSet Ix.Name)) + : Bool × Array String := Id.run do + let mut mismatches : Array String := #[] + + -- Check all entries in Lean's graph + for (name, leanRefs) in lean do + match rust.get? name with + | none => + if mismatches.size < 5 then + mismatches := mismatches.push s!"Missing in Rust: {name}" + | some rustRefs => + let leanSize := leanRefs.size + let rustSize := rustRefs.size + if leanSize != rustSize then + if mismatches.size < 5 then + mismatches := mismatches.push s!"Size mismatch for {name}: Lean={leanSize}, Rust={rustSize}" + else + for r in leanRefs do + if !rustRefs.contains r then + if mismatches.size < 5 then + mismatches := mismatches.push s!"{name}: ref {r} in Lean but not Rust" + break + + -- Check for extra entries in Rust + for (name, _) in rust do + if lean.get? name |>.isNone then + if mismatches.size < 5 then + mismatches := mismatches.push s!"Extra in Rust: {name}" + + return (mismatches.isEmpty, mismatches) + +/-- Cross-implementation test: compare Lean and Rust reference graph construction -/ +def testRefGraphComparison : TestSeq := + .individualIO "Reference Graph: Lean vs Rust" (do + let env ← get_env! + let numConsts := env.constants.toList.length + + IO.println s!"[Test] Reference Graph Comparison Test" + IO.println s!"[Test] Environment has {numConsts} constants" + IO.println "" + + -- Step 0: Canonicalize environment using Rust FFI (fast) + IO.println s!"[Test] Step 0: Canonicalizing environment via Rust FFI..." + let canonStart ← IO.monoMsNow + let rawEnv ← rsCanonicalizeEnvToIxRaw env.constants.toList + let ixEnv := rawEnv.toEnvironment + let canonTime := (← IO.monoMsNow) - canonStart + IO.println s!"[Test] Canonicalized {ixEnv.consts.size} constants in {canonTime}ms" + IO.println "" + + -- Step 1: Build reference graph in Rust + IO.println s!"[Test] Step 1: Building reference graph in Rust..." + let rustStart ← IO.monoMsNow + let rustRefArr ← rsBuildRefGraph env.constants.toList + let rustTime := (← IO.monoMsNow) - rustStart + IO.println s!"[Test] Rust: {rustRefArr.size} entries in {rustTime}ms" + + -- Step 2: Build reference graph in Lean (using pre-canonicalized environment) + IO.println s!"[Test] Step 2: Building reference graph in Lean..." + let leanStart ← IO.monoMsNow + let leanRefMap := Ix.GraphM.envParallel ixEnv + IO.println s!"[Test] Lean: {leanRefMap.size} entries " + let leanTime := (← IO.monoMsNow) - leanStart + IO.print s!"in {leanTime}ms" + IO.println "" + + -- Step 3: Compare results + IO.println s!"[Test] Step 3: Comparing results..." + let rustMap := rustRefGraphToMap rustRefArr + let leanMap := leanRefGraphToHashMap leanRefMap + let (isEqual, mismatches) := compareRefGraphs leanMap rustMap + + for msg in mismatches do + IO.println s!"[Test] {msg}" + + IO.println "" + IO.println s!"[Test] Summary:" + IO.println s!"[Test] Canon time: {canonTime}ms" + IO.println s!"[Test] Rust graph time: {rustTime}ms" + IO.println s!"[Test] Lean graph time: {leanTime}ms" + IO.println s!"[Test] Match: {isEqual}" + + if !isEqual then + return (false, some s!"Reference graphs do not match: {mismatches.size} mismatches") + + return (true, none) + ) .done + +/-- Cross-implementation test suite (expensive, run with --ignored) -/ +def suiteIO : List TestSeq := [ + testRefGraphComparison +] + +end Tests.Ix.GraphM diff --git a/Tests/Ix/IR.lean b/Tests/Ix/IR.lean deleted file mode 100644 index e69de29b..00000000 diff --git a/Tests/Ix/Ixon.lean b/Tests/Ix/Ixon.lean index 16a22076..5b665fcc 100644 --- a/Tests/Ix/Ixon.lean +++ b/Tests/Ix/Ixon.lean @@ -1,173 +1,260 @@ -import Ix.Ixon -import Tests.Common - -open LSpec SlimCheck Gen - -def genAddress : Gen Address := - pure (Address.mk (Blake3.hash "foobar".toUTF8).val) - -def genNat : Gen Nat := USize.toNat <$> genUSize - -def genBool : Gen Bool := choose Bool .false true - --- aggressively reduce size parameter to avoid tree blow-up -def genList (n: Gen α) : Gen (List α) := - resize (fun s => if s > 8 then 8 else s / 2) $ listOf n - -def genDefKind : Gen Ix.DefKind := - elements #[.definition, .opaque, .theorem] - -def genDefinitionSafety : Gen Lean.DefinitionSafety := - elements #[.unsafe, .safe, .partial] - -def genDefinition : Gen Ixon.Definition := - .mk <$> genDefKind <*> genDefinitionSafety <*> genNat <*> genAddress <*> genAddress - -def genAxiom : Gen Ixon.Axiom := - .mk <$> genBool <*> genNat <*> genAddress - -def genQuotKind : Gen Lean.QuotKind := - elements #[.type, .ctor, .lift, .ind] - -def genQuotient : Gen Ixon.Quotient := - .mk <$> genQuotKind <*> genNat <*> genAddress - -def genConstructorProj : Gen Ixon.ConstructorProj := - .mk <$> genNat <*> genNat <*> genAddress - -def genRecursorProj : Gen Ixon.RecursorProj := - .mk <$> genNat <*> genAddress - -def genInductiveProj : Gen Ixon.InductiveProj := - .mk <$> genNat <*> genAddress - -def genDefinitionProj : Gen Ixon.DefinitionProj := - .mk <$> genNat <*> genAddress - -def genRecursorRule : Gen Ixon.RecursorRule := - .mk <$> genNat <*> genAddress - -def genRecursor : Gen Ixon.Recursor := - .mk <$> genBool <*> genBool <*> genNat <*> genNat <*> genNat <*> genNat <*> genNat - <*> genAddress <*> genList genRecursorRule - -def genConstructor : Gen Ixon.Constructor := - .mk <$> genBool <*> genNat <*> genNat <*> genNat <*> genNat <*> genAddress - -def genInductive : Gen Ixon.Inductive := - .mk <$> genBool <*> genBool <*> genBool <*> genNat <*> genNat <*> genNat <*> genNat - <*> genAddress <*> genList genConstructor - --<*> genList genRecursor +/- + Pure Lean serialization tests for Ixon types. + Generators have been moved to Tests/Gen/Ixon.lean. +-/ -def genEvalClaim : Gen Ixon.EvalClaim := - .mk <$> genAddress <*> genAddress <*> genAddress <*> genAddress - -def genCheckClaim : Gen Ixon.CheckClaim := - .mk <$> genAddress <*> genAddress <*> genAddress - -def genClaim : Gen Ixon.Claim := - frequency [(10, .evals <$> genEvalClaim), (10, .checks <$> genCheckClaim)] - -def genProof : Gen Ixon.Proof := - .mk <$> genClaim <*> pure "foobar".toUTF8 - -def genComm : Gen Ixon.Comm := - .mk <$> genAddress <*> genAddress - -def genMetaAddress : Gen MetaAddress := - .mk <$> genAddress <*> genAddress - -def genEnv : Gen Ixon.Env := - .mk <$> genList genMetaAddress - -def genBinderInfo : Gen Lean.BinderInfo := - elements #[.default, .implicit, .strictImplicit, .instImplicit] - -def genReducibilityHints : Gen Lean.ReducibilityHints := - frequency [ - (10, pure .opaque), - (10, pure .abbrev), - (10, .regular <$> genUInt32), - ] - -def genDataValue : Gen Ixon.DataValue := - frequency [ - (10, .ofString <$> genAddress), - (10, .ofBool <$> genBool), - (10, .ofName <$> genAddress), - (10, .ofNat <$> genAddress), - (10, .ofInt <$> genAddress), - (10, .ofSyntax <$> genAddress), +import Ix.Ixon +import Ix.Sharing +import Tests.Gen.Ixon +import Tests.FFI.Ixon + +open LSpec SlimCheck Gen Ixon +open Tests.FFI.Ixon (rsEqUnivSerialization rsEqExprSerialization rsEqConstantSerialization rsEqEnvSerialization) + +/-! +## Roundtrip Tests for New Format Types +-/ + +def univSerde (u : Univ) : Bool := + let bytes := serUniv u + match desUniv bytes with + | .ok u' => u == u' + | .error _ => false + +def exprSerde (e : Expr) : Bool := + let bytes := serExpr e + match desExpr bytes with + | .ok e' => e == e' + | .error _ => false + +def constantSerde (c : Constant) : Bool := + let bytes := serConstant c + match desConstant bytes with + | .ok c' => c == c' + | .error _ => false + +def commSerde (c : Comm) : Bool := + let bytes := serComm c + match desComm bytes with + | .ok c' => c == c' + | .error _ => false + +def envSerde (raw : RawEnv) : Bool := + let env := raw.toEnv + let bytes1 := serEnv env + match desEnv bytes1 with + | .ok env' => + let bytes2 := serEnv env' + bytes1 == bytes2 -- Byte-level equality after roundtrip + | .error _ => false + +/-! +## Unit Tests for New Format Types +-/ + +def univUnits : TestSeq := + let cases : List Univ := [ + .zero, + .var 0, + .var 42, + .succ .zero, + .succ (.succ .zero), + .succ (.succ (.succ .zero)), -- Test telescope compression + .max .zero (.var 0), + .imax (.var 1) .zero, + .max (.succ .zero) (.succ (.succ .zero)), ] - -def genMetadatum : Gen Ixon.Metadatum := - frequency [ - (10, .info <$> genBinderInfo), - (10, .link <$> genAddress), - (10, .hints <$> genReducibilityHints), - (10, .links <$> genList genAddress), - --(10, .rules <$> genList genAddress), - (10, .kvmap <$> genList (Prod.mk <$> genAddress <*> genDataValue)), + cases.foldl (init := .done) fun acc u => + acc ++ test s!"Univ roundtrip: {repr u}" (univSerde u) + +def exprUnits : TestSeq := + let cases : List Expr := [ + .sort 0, + .var 0, + .var 42, + .ref 0 #[], + .ref 1 #[0, 1, 2], + .recur 0 #[], + .recur 2 #[1], + .str 5, + .nat 10, + .share 0, + .app (.var 0) (.var 1), + .app (.app (.var 0) (.var 1)) (.var 2), -- Nested apps (telescope) + .lam (.sort 0) (.var 0), + .lam (.sort 0) (.lam (.sort 1) (.var 0)), -- Nested lams (telescope) + .all (.sort 0) (.var 0), + .all (.sort 0) (.all (.sort 1) (.var 0)), -- Nested alls (telescope) + .letE true (.sort 0) (.var 0) (.var 1), + .letE false (.sort 0) (.var 0) (.var 1), + .prj 0 1 (.var 0), ] - -def genMetadata : Gen Ixon.Metadata := - .mk <$> genList genMetadatum - - -partial def genIxon : Gen Ixon.Ixon := - frequency [ - (10, pure .nanon), - (10, .nstr <$> genAddress <*> genAddress), - (10, .nnum <$> genAddress <*> genAddress), - (10, pure .uzero), - (10, .usucc <$> genAddress), - (10, .umax <$> genAddress <*> genAddress), - (10, .uimax <$> genAddress <*> genAddress), - (10, .uvar <$> genNat), - (10, .evar <$> genNat), - (10, .eref <$> genAddress <*> genList genAddress), - (10, .erec <$> genNat <*> genList genAddress), - (10, .eprj <$> genAddress <*> genNat <*> genAddress), - (10, .esort <$> genAddress), - (10, .estr <$> genAddress), - (10, .enat <$> genAddress), - (10, .eapp <$> genAddress <*> genAddress), - (10, .elam <$> genAddress <*> genAddress), - (10, .eall <$> genAddress <*> genAddress), - (10, .elet <$> genBool <*> genAddress <*> genAddress <*> genAddress), - (10, (.blob ∘ .mk ∘ .mk) <$> genList genUInt8), - (10, .defn <$> genDefinition), - (10, .axio <$> genAxiom), - (10, .quot <$> genQuotient), - (10, .cprj <$> genConstructorProj), - (10, .iprj <$> genInductiveProj), - (10, .dprj <$> genDefinitionProj), - (10, .muts <$> genList (.indc <$> genInductive)), - (10, .muts <$> genList (.defn <$> genDefinition)), - (10, .muts <$> genList (.recr <$> genRecursor)), - (10, .prof <$> genProof), - (10, .eval <$> genEvalClaim), - (10, .chck <$> genCheckClaim), - (10, .comm <$> genComm), - (10, .envn <$> genEnv), - (10, .meta <$> genMetadata), - ] - -instance : Shrinkable Ixon.Ixon where - shrink _ := [] - -instance : SampleableExt Ixon.Ixon := SampleableExt.mkSelfContained genIxon - -@[extern "rs_eq_lean_rust_serialization"] -private opaque eqLeanRustSerialization : @& Ixon.Ixon → @& ByteArray → Bool - -def ixonSerde (ixon : Ixon.Ixon) : Bool := - let bytes := Ixon.ser ixon - if !eqLeanRustSerialization ixon bytes then false else - match Ixon.de bytes with - | .ok ixon' => ixon == ixon' - | .error _ => false - -def Tests.Ixon.suite := [ - check "Ixon serde roundtrips" (∀ ixon : Ixon.Ixon, ixonSerde ixon) + cases.foldl (init := .done) fun acc e => + acc ++ test s!"Expr roundtrip: {repr e}" (exprSerde e) + +def constantUnits : TestSeq := + let defn := Definition.mk .defn .safe 0 (.sort 0) (.var 0) + let c := Constant.mk + (.defn defn) + #[.var 0, .sort 1] -- sharing + #[⟨(Blake3.hash "ref".toUTF8).val⟩] -- refs + #[.zero, .succ .zero] -- univs + test "Constant roundtrip" (constantSerde c) + +def commUnits : TestSeq := + let addr1 : Address := ⟨(Blake3.hash "secret".toUTF8).val⟩ + let addr2 : Address := ⟨(Blake3.hash "payload".toUTF8).val⟩ + let c := Comm.mk addr1 addr2 + test "Comm roundtrip" (commSerde c) + +/-! +## Sharing Analysis Tests +-/ + +def sharingTest1 : Bool := + let e1 := Expr.app (.var 0) (.var 1) + let (rewritten1, sharing1) := Ix.Sharing.applySharing #[e1] + sharing1.isEmpty && rewritten1[0]! == e1 + +def sharingTest2 : Bool := + let ty := Expr.sort 0 + let e2 := Expr.app (.lam ty (.var 0)) (.lam ty (.var 1)) + let (_, sharing2) := Ix.Sharing.applySharing #[e2] + sharing2.size == 1 + +def sharingTest3 : Bool := + let var0 := Expr.var 0 + let e3a := Expr.app var0 var0 + let e3b := Expr.app var0 (.var 1) + let e3c := Expr.app var0 (.var 2) + let (_, sharing3) := Ix.Sharing.applySharing #[e3a, e3b, e3c] + sharing3.size >= 1 + +def sharingTest4 : Bool := + let e4 := Expr.lam (.sort 0) (.app (.var 0) (.var 0)) + let (rewritten4, _) := Ix.Sharing.applySharing #[e4] + let serialized := serExpr rewritten4[0]! + match desExpr serialized with + | .ok e => e == rewritten4[0]! + | .error _ => false + +def sharingUnits : TestSeq := + test "no sharing for unique subterms" sharingTest1 + ++ test "shares repeated sort 0" sharingTest2 + ++ test "analyzes multiple expressions" sharingTest3 + ++ test "roundtrip after sharing" sharingTest4 + +/-! ## Env Unit Tests -/ + +def envSerdeUnit (env : Env) : Bool := + let bytes1 := serEnv env + match desEnv bytes1 with + | .ok env' => + let bytes2 := serEnv env' + bytes1 == bytes2 + | .error _ => false + +def envUnitTests : TestSeq := + -- Test 1: Empty env + let emptyEnv : Env := {} + -- Test 2: Env with only a blob + let blobAddr := Address.blake3 (ByteArray.mk #[1, 2, 3]) + let envWithBlob : Env := { blobs := ({} : Std.HashMap _ _).insert blobAddr (ByteArray.mk #[4, 5, 6]) } + -- Test 3: Env with a simple name (no named entry) + let testName := Ix.Name.mkStr Ix.Name.mkAnon "test" + let testNameAddr := testName.getHash + let envWithName : Env := { names := ({} : Std.HashMap _ _).insert testNameAddr testName } + -- Test 4: Env with named entry and empty metadata + let constAddr := Address.blake3 (ByteArray.mk #[7, 8, 9]) + let envWithNamed : Env := Id.run do + let mut env : Env := {} + env := { env with names := RawEnv.addNameComponents env.names testName } + env := env.registerName testName { addr := constAddr, constMeta := .empty } + return env + -- Test 5: Env with nested name and named entry + let nestedName := Ix.Name.mkStr (Ix.Name.mkNat testName 42) "bar" + let envWithNestedName : Env := Id.run do + let mut env : Env := {} + env := { env with names := RawEnv.addNameComponents env.names nestedName } + env := env.registerName nestedName { addr := constAddr, constMeta := .empty } + return env + -- Test 6: Env with blob and comm + let secretAddr := Address.blake3 (ByteArray.mk #[10, 11, 12]) + let payloadAddr := Address.blake3 (ByteArray.mk #[13, 14, 15]) + let commAddr := Address.blake3 (ByteArray.mk #[16, 17, 18]) + let envWithBlobAndComm : Env := { + blobs := ({} : Std.HashMap _ _).insert blobAddr (ByteArray.mk #[4, 5, 6]), + comms := ({} : Std.HashMap _ _).insert commAddr (Comm.mk secretAddr payloadAddr) + } + test "Empty env roundtrip" (envSerdeUnit emptyEnv) ++ + test "Env with blob roundtrip" (envSerdeUnit envWithBlob) ++ + test "Env with name roundtrip" (envSerdeUnit envWithName) ++ + test "Env with named (empty meta) roundtrip" (envSerdeUnit envWithNamed) ++ + test "Env with nested name roundtrip" (envSerdeUnit envWithNestedName) ++ + test "Env with blob+comm roundtrip" (envSerdeUnit envWithBlobAndComm) + +/-! ## Cross-implementation serialization comparison tests -/ + +def univSerializationMatches (u : Univ) : Bool := + rsEqUnivSerialization u (serUniv u) + +def exprSerializationMatches (e : Expr) : Bool := + rsEqExprSerialization e (serExpr e) + +def constantSerializationMatches (c : Constant) : Bool := + rsEqConstantSerialization c (serConstant c) + +def envSerializationMatches (raw : RawEnv) : Bool := + let env := raw.toEnv + rsEqEnvSerialization raw (serEnv env) + +/-- Unit tests for Lean==Rust serialization comparison -/ +def envSerializationUnitTests : TestSeq := + -- Test 1: Empty env + let emptyRaw : RawEnv := { consts := #[], named := #[], blobs := #[], comms := #[] } + -- Test 2: Env with one blob + let blobAddr := Address.blake3 (ByteArray.mk #[1, 2, 3]) + let blobRaw : RawEnv := { + consts := #[], named := #[], + blobs := #[{ addr := blobAddr, bytes := ByteArray.mk #[4, 5, 6] }], + comms := #[] + } + -- Test 3: Env with one comm + let commAddr := Address.blake3 (ByteArray.mk #[7, 8, 9]) + let secretAddr := Address.blake3 (ByteArray.mk #[10, 11, 12]) + let payloadAddr := Address.blake3 (ByteArray.mk #[13, 14, 15]) + let commRaw : RawEnv := { + consts := #[], named := #[], + blobs := #[], + comms := #[{ addr := commAddr, comm := Comm.mk secretAddr payloadAddr }] + } + -- Test 4: Env with blob + comm + let blobCommRaw : RawEnv := { + consts := #[], named := #[], + blobs := #[{ addr := blobAddr, bytes := ByteArray.mk #[4, 5, 6] }], + comms := #[{ addr := commAddr, comm := Comm.mk secretAddr payloadAddr }] + } + test "Empty env Lean==Rust" (envSerializationMatches emptyRaw) ++ + test "Blob env Lean==Rust" (envSerializationMatches blobRaw) ++ + test "Comm env Lean==Rust" (envSerializationMatches commRaw) ++ + test "Blob+Comm env Lean==Rust" (envSerializationMatches blobCommRaw) + +/-! ## Test Suite (property-based) -/ + +def Tests.Ixon.suite : List TestSeq := [ + -- Env unit tests (for debugging serialization) + envUnitTests, + -- Env serialization comparison unit tests + envSerializationUnitTests, + -- Pure Lean serde roundtrips + checkIO "Univ serde roundtrips" (∀ u : Univ, univSerde u), + checkIO "Expr serde roundtrips" (∀ e : Expr, exprSerde e), + checkIO "Constant serde roundtrips" (∀ c : Constant, constantSerde c), + checkIO "Comm serde roundtrips" (∀ c : Comm, commSerde c), + checkIO "Env serde roundtrips" (∀ raw : RawEnv, envSerde raw), + -- Cross-implementation serialization comparison (Lean == Rust) + checkIO "Univ serialization Lean==Rust" (∀ u : Univ, univSerializationMatches u), + checkIO "Expr serialization Lean==Rust" (∀ e : Expr, exprSerializationMatches e), + checkIO "Constant serialization Lean==Rust" (∀ c : Constant, constantSerializationMatches c), + checkIO "Env serialization Lean==Rust" (∀ raw : RawEnv, envSerializationMatches raw), ] diff --git a/Tests/Ix/RustDecompile.lean b/Tests/Ix/RustDecompile.lean new file mode 100644 index 00000000..89bdd5d2 --- /dev/null +++ b/Tests/Ix/RustDecompile.lean @@ -0,0 +1,126 @@ +/- + Rust decompilation tests. + Tests the Rust FFI endpoint for decompilation by compiling with Rust, + decompiling with Rust, and comparing against the original environment. +-/ + +import Ix.Ixon +import Ix.Environment +import Ix.Address +import Ix.Common +import Ix.Meta +import Ix.CompileM +import Ix.DecompileM +import Lean +import LSpec +import Tests.Ix.Fixtures + +open LSpec + +namespace Tests.RustDecompile + +/-- Test Rust decompilation: compile → rsDecompileEnv → hash comparison -/ +def testRustDecompile : TestSeq := + .individualIO "Rust Decompilation Roundtrip" (do + let leanEnv ← get_env! + let totalConsts := leanEnv.constants.toList.length + + IO.println s!"[Test] Rust Decompilation Roundtrip Test" + IO.println s!"[Test] Environment has {totalConsts} constants" + IO.println "" + + -- Step 1: Run Rust compilation pipeline + IO.println s!"[Step 1] Running Rust compilation pipeline..." + let rustStart ← IO.monoMsNow + let phases ← Ix.CompileM.rsCompilePhases leanEnv + let rustTime := (← IO.monoMsNow) - rustStart + IO.println s!"[Step 1] Rust: {phases.compileEnv.constCount} compiled in {rustTime}ms" + IO.println s!"[Step 1] names={phases.compileEnv.names.size}, named={phases.compileEnv.named.size}, consts={phases.compileEnv.consts.size}, blobs={phases.compileEnv.blobs.size}" + IO.println "" + + -- Step 2: Decompile with Rust + IO.println s!"[Step 2] Decompiling with Rust (rsDecompileEnv)..." + let decompStart ← IO.monoMsNow + let decompiled ← match Ix.DecompileM.rsDecompileEnv phases.compileEnv with + | .ok env => pure env + | .error e => do + IO.println s!"[Step 2] FAILED: {toString e}" + return (false, some (toString e)) + let decompTime := (← IO.monoMsNow) - decompStart + IO.println s!"[Step 2] {decompiled.size} constants decompiled in {decompTime}ms" + IO.println "" + + -- Count by constant type + let mut nDefn := (0 : Nat); let mut nAxiom := (0 : Nat) + let mut nInduct := (0 : Nat); let mut nCtor := (0 : Nat) + let mut nRec := (0 : Nat); let mut nQuot := (0 : Nat) + let mut nOpaque := (0 : Nat); let mut nThm := (0 : Nat) + for (_, info) in decompiled do + match info with + | .defnInfo _ => nDefn := nDefn + 1 + | .axiomInfo _ => nAxiom := nAxiom + 1 + | .inductInfo _ => nInduct := nInduct + 1 + | .ctorInfo _ => nCtor := nCtor + 1 + | .recInfo _ => nRec := nRec + 1 + | .quotInfo _ => nQuot := nQuot + 1 + | .opaqueInfo _ => nOpaque := nOpaque + 1 + | .thmInfo _ => nThm := nThm + 1 + IO.println s!"[Types] defn={nDefn}, thm={nThm}, opaque={nOpaque}, axiom={nAxiom}, induct={nInduct}, ctor={nCtor}, rec={nRec}, quot={nQuot}" + IO.println "" + + -- Step 3: Hash-based comparison against original Ix.Environment + let ixEnv := phases.rawEnv + IO.println s!"[Step 3] Original Ix.Environment has {ixEnv.consts.size} constants" + IO.println s!"[Compare] Hash-comparing {decompiled.size} decompiled constants..." + let compareStart ← IO.monoMsNow + + let mut nMatch := (0 : Nat); let mut nMismatch := (0 : Nat); let mut nMissing := (0 : Nat) + let mut firstMismatches : Array (Ix.Name × String) := #[] + for (name, decompInfo) in decompiled do + match ixEnv.consts.get? name with + | some origInfo => + let decompTyHash := decompInfo.getCnst.type.getHash + let origTyHash := origInfo.getCnst.type.getHash + if decompTyHash != origTyHash then + nMismatch := nMismatch + 1 + if firstMismatches.size < 10 then + firstMismatches := firstMismatches.push (name, s!"type hash mismatch") + else + let valMismatch := match decompInfo, origInfo with + | .defnInfo dv, .defnInfo ov => dv.value.getHash != ov.value.getHash + | .thmInfo dv, .thmInfo ov => dv.value.getHash != ov.value.getHash + | .opaqueInfo dv, .opaqueInfo ov => dv.value.getHash != ov.value.getHash + | _, _ => false + if valMismatch then + nMismatch := nMismatch + 1 + if firstMismatches.size < 10 then + firstMismatches := firstMismatches.push (name, s!"value hash mismatch") + else + nMatch := nMatch + 1 + | none => + nMissing := nMissing + 1 + if firstMismatches.size < 10 then + firstMismatches := firstMismatches.push (name, "not in original") + + let compareTime := (← IO.monoMsNow) - compareStart + IO.println s!"[Compare] Matched: {nMatch}, Mismatched: {nMismatch}, Missing: {nMissing} ({compareTime}ms)" + if !firstMismatches.isEmpty then + IO.println s!"[Compare] First mismatches:" + for (name, diff) in firstMismatches do + IO.println s!" {name}: {diff}" + IO.println "" + + let success := nMismatch == 0 && nMissing == 0 + if success then + return (true, none) + else + return (false, some s!"{nMismatch} mismatches, {nMissing} missing") + ) .done + +/-! ## Test Suite -/ + +def rustDecompileSuiteIO : List TestSeq := [ + testRustDecompile, +] + +end Tests.RustDecompile diff --git a/Tests/Ix/RustSerialize.lean b/Tests/Ix/RustSerialize.lean new file mode 100644 index 00000000..dd5e4684 --- /dev/null +++ b/Tests/Ix/RustSerialize.lean @@ -0,0 +1,99 @@ +/- + Rust serialization/deserialization tests. + Tests the Rust FFI endpoints for Ixon.Env serialization and deserialization + by roundtripping through Rust and comparing with the Lean serializer. +-/ + +import Ix.Ixon +import Ix.Common +import Ix.Meta +import Ix.CompileM +import Lean +import LSpec +import Tests.Ix.Fixtures + +open LSpec + +namespace Tests.RustSerialize + +/-- Test Rust serde roundtrip: compile → rsSerEnv → rsDesEnv → Lean serEnv → byte compare -/ +def testRustSerdeRoundtrip : TestSeq := + .individualIO "Rust Serialize/Deserialize Roundtrip" (do + let leanEnv ← get_env! + let totalConsts := leanEnv.constants.toList.length + + IO.println s!"[Test] Rust Serialize/Deserialize Roundtrip Test" + IO.println s!"[Test] Environment has {totalConsts} constants" + IO.println "" + + -- Step 1: Compile with Rust to get an Ixon.Env + IO.println s!"[Step 1] Running Rust compilation pipeline..." + let compileStart ← IO.monoMsNow + let ixonEnv ← Ix.CompileM.rsCompileEnv leanEnv + let compileTime := (← IO.monoMsNow) - compileStart + IO.println s!"[Step 1] Compiled: {ixonEnv.constCount} constants in {compileTime}ms" + IO.println "" + + -- Step 2: Canonical Lean serialization (deterministic, sorted by key) + IO.println s!"[Step 2] Serializing with Lean (serEnv)..." + let leanSerStart ← IO.monoMsNow + let leanBytes := Ixon.serEnv ixonEnv + let leanSerTime := (← IO.monoMsNow) - leanSerStart + IO.println s!"[Step 2] {leanBytes.size} bytes in {leanSerTime}ms" + IO.println "" + + -- Step 3: Serialize with Rust + IO.println s!"[Step 3] Serializing with Rust (rsSerEnv)..." + let rustSerStart ← IO.monoMsNow + let rustBytes := Ixon.rsSerEnv ixonEnv + let rustSerTime := (← IO.monoMsNow) - rustSerStart + IO.println s!"[Step 3] {rustBytes.size} bytes in {rustSerTime}ms" + IO.println "" + + -- Step 4: Deserialize Rust bytes with Rust + IO.println s!"[Step 4] Deserializing Rust bytes with Rust (rsDesEnv)..." + let rustDesStart ← IO.monoMsNow + let roundtrippedFromRust ← match Ixon.rsDesEnv rustBytes with + | .ok env => pure env + | .error e => do + IO.println s!"[Step 4] FAILED: {e}" + return (false, some e) + let rustDesTime := (← IO.monoMsNow) - rustDesStart + IO.println s!"[Step 4] {roundtrippedFromRust.constCount} constants in {rustDesTime}ms" + IO.println "" + + -- Step 5: Re-serialize the roundtripped env with Lean (deterministic) + IO.println s!"[Step 5] Re-serializing roundtripped env with Lean..." + let reserStart ← IO.monoMsNow + let roundtrippedBytes := Ixon.serEnv roundtrippedFromRust + let reserTime := (← IO.monoMsNow) - reserStart + IO.println s!"[Step 5] {roundtrippedBytes.size} bytes in {reserTime}ms" + IO.println "" + + -- Step 6: Byte-exact comparison + IO.println s!"[Step 6] Comparing Lean serialization vs roundtripped..." + if leanBytes == roundtrippedBytes then + IO.println s!"[Step 6] Byte-exact match! ({leanBytes.size} bytes) ✓" + IO.println "" + return (true, none) + else + IO.println s!"[Step 6] MISMATCH: {leanBytes.size} bytes vs {roundtrippedBytes.size} bytes" + -- Find first diff + let minLen := min leanBytes.size roundtrippedBytes.size + let mut firstDiff := minLen + for i in [:minLen] do + if leanBytes.get! i != roundtrippedBytes.get! i then + firstDiff := i + break + IO.println s!"[Step 6] First difference at byte {firstDiff}" + IO.println "" + return (false, some s!"Bytes differ at offset {firstDiff} (original {leanBytes.size} vs roundtripped {roundtrippedBytes.size})") + ) .done + +/-! ## Test Suite -/ + +def rustSerializeSuiteIO : List TestSeq := [ + testRustSerdeRoundtrip, +] + +end Tests.RustSerialize diff --git a/Tests/Ix/Sharing.lean b/Tests/Ix/Sharing.lean new file mode 100644 index 00000000..3ae1c9c5 --- /dev/null +++ b/Tests/Ix/Sharing.lean @@ -0,0 +1,674 @@ +/- + Unit tests for Sharing module - verifies hash-consing compatibility with Rust. + + The hashing algorithm must produce identical results to Rust's `hash_node` function + for cross-implementation compatibility. +-/ + +import Ix.Sharing +import Ix.Ixon +import Ix.CompileM +import Ix.CanonM +import Ix.Meta +import Ix.Environment +import LSpec + +open LSpec Ix.Sharing Ixon Ix.CompileM Ix + +namespace Tests.Sharing + +/-! ## uint64ToBytes tests -/ + +/-- Verify uint64ToBytes produces exactly 8 bytes. -/ +def testUint64ToBytesLength : TestSeq := + group "uint64ToBytes length" <| + test "0" ((uint64ToBytes 0).size == 8) ++ + test "1" ((uint64ToBytes 1).size == 8) ++ + test "255" ((uint64ToBytes 255).size == 8) ++ + test "256" ((uint64ToBytes 256).size == 8) ++ + test "maxUInt64" ((uint64ToBytes UInt64.MAX).size == 8) + +/-- Verify uint64ToBytes produces correct little-endian encoding. -/ +def testUint64ToBytesEncoding : TestSeq := + group "uint64ToBytes encoding" <| + -- 0 should be [0,0,0,0,0,0,0,0] + test "0 bytes" (uint64ToBytes 0 == ⟨#[0,0,0,0,0,0,0,0]⟩) ++ + -- 1 should be [1,0,0,0,0,0,0,0] + test "1 bytes" (uint64ToBytes 1 == ⟨#[1,0,0,0,0,0,0,0]⟩) ++ + -- 256 should be [0,1,0,0,0,0,0,0] + test "256 bytes" (uint64ToBytes 256 == ⟨#[0,1,0,0,0,0,0,0]⟩) ++ + -- 0x0102030405060708 should be [8,7,6,5,4,3,2,1] + test "0x0102030405060708 bytes" + (uint64ToBytes 0x0102030405060708 == ⟨#[0x08,0x07,0x06,0x05,0x04,0x03,0x02,0x01]⟩) + +/-! ## Hash consistency tests -/ + +/-- Verify that the same expression always produces the same hash. -/ +def testHashConsistency : TestSeq := + let e1 := Expr.var 0 + let e2 := Expr.var 0 + let h1 := computeExprHash e1 + let h2 := computeExprHash e2 + test "same expr same hash" (h1 == h2) + +/-- Verify that different expressions produce different hashes. -/ +def testHashDifferent : TestSeq := + let e1 := Expr.var 0 + let e2 := Expr.var 1 + let h1 := computeExprHash e1 + let h2 := computeExprHash e2 + test "different expr different hash" (h1 != h2) + +/-! ## Hash buffer construction tests -/ + +/-- Test that Sort expression hash buffer is: [FLAG_SORT, u64_le_bytes...] -/ +def testSortHashBuffer : TestSeq := + -- Sort(5) should hash buffer: [0x00, 5, 0, 0, 0, 0, 0, 0, 0] + let expected := ByteArray.empty + |>.push Expr.FLAG_SORT + |>.append (uint64ToBytes 5) + test "sort buffer size" (expected.size == 9) ++ + test "sort buffer content" (expected.data[0]! == 0x00 && expected.data[1]! == 5) + +/-- Test that Var expression hash buffer is: [FLAG_VAR, u64_le_bytes...] -/ +def testVarHashBuffer : TestSeq := + -- Var(3) should hash buffer: [0x01, 3, 0, 0, 0, 0, 0, 0, 0] + let expected := ByteArray.empty + |>.push Expr.FLAG_VAR + |>.append (uint64ToBytes 3) + test "var buffer size" (expected.size == 9) ++ + test "var buffer content" (expected.data[0]! == 0x01 && expected.data[1]! == 3) + +/-- Test that App expression includes child hashes (64 bytes for 2 children). -/ +def testAppHashBuffer : TestSeq := + let fun_ := Expr.var 0 + let arg := Expr.var 1 + let funHash := computeExprHash fun_ + let argHash := computeExprHash arg + -- App buffer: [0x07, funHash(32 bytes), argHash(32 bytes)] + let expected := ByteArray.empty + |>.push Expr.FLAG_APP + |>.append funHash.hash + |>.append argHash.hash + test "app buffer size" (expected.size == 65) ++ + test "app buffer flag" (expected.data[0]! == 0x07) + +/-! ## Sharing analysis tests -/ + +/-- Test that applySharing returns empty sharing vec for unique subterms. -/ +def testNoSharing : TestSeq := + let exprs := #[Expr.var 0, Expr.var 1, Expr.var 2] + let (rewritten, sharingVec) := applySharing exprs + test "no sharing needed" (sharingVec.isEmpty) ++ + test "expressions unchanged" (rewritten == exprs) + +/-- Test that applySharing detects shared subterms. -/ +def testWithSharing : TestSeq := + -- Create expressions with shared subterm: (var 42) appears twice + let shared := Expr.var 42 + let e1 := Expr.app shared (Expr.var 1) + let e2 := Expr.app shared (Expr.var 2) + let e3 := Expr.app shared (Expr.var 3) + let exprs := #[e1, e2, e3] + let (_rewritten, _sharingVec) := applySharing exprs + -- var 42 should be shared since it appears 3 times + -- But it's a small term, so sharing might not be profitable + -- Just verify the function runs without error + test "sharing analysis completes" (_rewritten.size == exprs.size) + +/-- Test that sharing vector is in topological order (leaves first). -/ +def testSharingTopoOrder : TestSeq := + -- Create: App(Lam(T, B), A) where T appears in multiple places + let t := Expr.var 0 -- type + let b := Expr.var 1 -- body + let a := Expr.var 2 -- arg + let lam := Expr.lam t b + let app := Expr.app lam a + -- Use the same type in another expression + let e2 := Expr.app (Expr.lam t (Expr.var 3)) a + let exprs := #[app, e2] + let (_, sharingVec) := applySharing exprs + -- Verify no forward references in sharing vector + -- (each Share(idx) in sharingVec[i] must have idx < i) + let valid := sharingVec.foldl (init := (true, 0)) fun (ok, i) e => + let thisOk := checkNoForwardRefs e i + (ok && thisOk, i + 1) + test "no forward references" valid.1 +where + checkNoForwardRefs (e : Ixon.Expr) (maxIdx : Nat) : Bool := + match e with + | .share idx => idx.toNat < maxIdx + | .app f a => checkNoForwardRefs f maxIdx && checkNoForwardRefs a maxIdx + | .lam t b => checkNoForwardRefs t maxIdx && checkNoForwardRefs b maxIdx + | .all t b => checkNoForwardRefs t maxIdx && checkNoForwardRefs b maxIdx + | .letE _ t v b => + checkNoForwardRefs t maxIdx && checkNoForwardRefs v maxIdx && checkNoForwardRefs b maxIdx + | .prj _ _ v => checkNoForwardRefs v maxIdx + | _ => true + +/-! ## Known hash value tests (for cross-impl verification) -/ + +/-- Compute expected hash for Var(0) and verify it's deterministic. + The actual hash value should match Rust's hash_node for the same input. -/ +def testKnownHashVar0 : TestSeq := + let e := Expr.var 0 + let h := computeExprHash e + -- Hash should be blake3([0x01, 0, 0, 0, 0, 0, 0, 0, 0]) + let buf := ByteArray.empty.push 0x01 |>.append (uint64ToBytes 0) + let expected := Address.blake3 buf + test "var 0 hash matches expected" (h == expected) + +/-- Compute expected hash for Sort(0) and verify. -/ +def testKnownHashSort0 : TestSeq := + let e := Expr.sort 0 + let h := computeExprHash e + -- Hash should be blake3([0x00, 0, 0, 0, 0, 0, 0, 0, 0]) + let buf := ByteArray.empty.push 0x00 |>.append (uint64ToBytes 0) + let expected := Address.blake3 buf + test "sort 0 hash matches expected" (h == expected) + +/-! ## Cross-implementation tests (Lean vs Rust) -/ + +/-- FFI: Run Rust's sharing analysis and return the count of shared items. -/ +@[extern "rs_analyze_sharing_count"] +opaque rsAnalyzeSharingCount : @& Array Ixon.Expr → UInt64 + +/-- FFI: Compare Lean's sharing analysis with Rust's on the same input. + Returns packed u64: + - bits 0-31: 1 if sharing vectors match, 0 otherwise + - bits 32-47: Lean sharing count + - bits 48-63: Rust sharing count -/ +@[extern "rs_compare_sharing_analysis"] +opaque rsCompareSharingAnalysis : @& Array Ixon.Expr → @& Array Ixon.Expr → @& Array Ixon.Expr → UInt64 + +/-- FFI: Debug sharing analysis - print Rust's view of the input expressions. -/ +@[extern "rs_debug_sharing_analysis"] +opaque rsDebugSharingAnalysis : @& Array Ixon.Expr → Unit + +/-- Opaque type representing a compiled environment from Rust. + This is an external object managed by the Rust FFI layer. -/ +opaque RustCompiledEnv : Type + +/-- FFI: Get the buffer length needed for pre-sharing expressions. -/ +@[extern "rs_get_pre_sharing_exprs_len"] +opaque rsGetPreSharingExprsLen : @& RustCompiledEnv → @& Lean.Name → UInt64 + +/-- FFI: Get the pre-sharing root expressions for a constant as serialized bytes. + Returns the number of expressions. Output buffer format: + [n_exprs:u64, len1:u64, expr1_bytes..., len2:u64, expr2_bytes..., ...] -/ +@[extern "rs_get_pre_sharing_exprs"] +opaque rsGetPreSharingExprs : @& RustCompiledEnv → @& Lean.Name → @& ByteArray → IO UInt64 + +/-- FFI: Look up a constant's compiled address (32-byte blake3 hash). + Returns true if found, copies address to out_addr ByteArray. -/ +@[extern "rs_lookup_const_addr"] +opaque rsLookupConstAddr : @& RustCompiledEnv → @& Lean.Name → @& ByteArray → IO Bool + +/-- FFI: Get the total number of compiled constants. -/ +@[extern "rs_get_compiled_const_count"] +opaque rsGetCompiledConstCount : @& RustCompiledEnv → UInt64 + +/-- Unpack the comparison result from rsCompareSharingAnalysis -/ +def unpackSharingComparison (packed : UInt64) : Bool × UInt64 × UInt64 := + let isMatch := (packed &&& 0xFFFFFFFF) == 1 + let leanCount := (packed >>> 32) &&& 0xFFFF + let rustCount := (packed >>> 48) &&& 0xFFFF + (isMatch, leanCount, rustCount) + +/-! ## Recursor sharing test -/ + +/-- Test sharing analysis on a recursor-like structure. + + A typical 4-constructor recursor type looks like: + ∀ (motive : T → Sort u), + (minor1 : motive C1) → (minor2 : motive C2) → (minor3 : motive C3) → (minor4 : motive C4) → + ∀ (t : T), motive t + + Where T = ref 0 #[] appears 6 times (in motive type, each minor, and final target). + With usage=6, size=2 (tag4(0) + tag0(0)), profitability is: + (6-1)*2 = 10 > 6*1 = 6 ✓ PROFITABLE + + This test verifies Lean correctly identifies and shares such repeated refs. +-/ +def testRecursorSharing : TestSeq := Id.run do + -- Test: Expression with ref appearing 4 times should be shared + -- For profitability: (n-1)*size > n*ref_size + -- With n=4, size=2: (4-1)*2 = 6 > 4*1 = 4 ✓ PROFITABLE + let t := Expr.ref 0 #[] -- size=2 (tag4(0) + tag0(0)) + let sortU := Expr.sort 1 + + -- Use t four times: in nested all expressions + let e1 := Expr.all t sortU -- use 1 + let e2 := Expr.all t e1 -- use 2 + let e3 := Expr.all t e2 -- use 3 + let e4 := Expr.all t e3 -- use 4 + + let result := analyzeBlock #[e4] + let effectiveSizes := computeEffectiveSizes result.infoMap result.topoOrder + let sharedHashes := decideSharing result.infoMap result.topoOrder + + let refHash := computeExprHash t + let refInfo := result.infoMap.get? refHash + let refUsage := refInfo.map (·.usageCount) |>.getD 0 + let refEffSize := effectiveSizes.getD refHash 0 + let refGross : _root_.Int := (refUsage - 1 : _root_.Int) * refEffSize + let refPotential : _root_.Int := refGross - refUsage + + let refShared := sharedHashes.any (· == refHash) + + let (_, sharingVec) := buildSharingVec #[e4] sharedHashes result.infoMap result.ptrToHash + + return group "recursor sharing" ( + test "ref found with usage >= 4" (refUsage >= 4) ++ + test "ref potential > 0" (refPotential > 0) ++ + test "ref is shared" refShared ++ + test "sharing vector non-empty" (sharingVec.size >= 1) + ) + +/-- Test with a realistic recursor-like structure. + This mimics what we see in the actual test output: + - Type: ∀ (motive : T → Sort u), motive C1 → motive C2 → motive C3 → ∀ (t : T), motive t + - Rules: var X, var Y, var Z (just minor arguments) +-/ +def testRealisticRecursor : TestSeq := Id.run do + -- The inductive type T = ref 0 + let tRef := Expr.ref 0 #[] + let sortU := Expr.sort 1 + let _sort0 := Expr.sort 0 + + -- Constructor refs + let c1 := Expr.ref 1 #[] + let c2 := Expr.ref 2 #[] + let c3 := Expr.ref 3 #[] + + -- Build the type: ∀ (motive : T → Sort u), motive C1 → motive C2 → motive C3 → ∀ (t : T), motive t + -- From inside out: + let _motiveVar := Expr.var 0 -- motive when in scope + let tVar := Expr.var 0 -- t when innermost + + -- ∀ (t : T), motive t (motive is var 1 here due to binder) + let targetBody := Expr.app (Expr.var 1) tVar + let target := Expr.all tRef targetBody + + -- motive C3 → target (motive is var 3 here) + let minor3 := Expr.app (Expr.var 3) c3 + let withMinor3 := Expr.all minor3 target + + -- motive C2 → ... (motive is var 2 here) + let minor2 := Expr.app (Expr.var 2) c2 + let withMinor2 := Expr.all minor2 withMinor3 + + -- motive C1 → ... (motive is var 1 here) + let minor1 := Expr.app (Expr.var 1) c1 + let withMinor1 := Expr.all minor1 withMinor2 + + -- ∀ (motive : T → Sort u), ... + let motiveType := Expr.all tRef sortU + let fullType := Expr.all motiveType withMinor1 + + -- Rules are just bound variables (minor arguments) + let rule1 := Expr.var 1 + let rule2 := Expr.var 2 + let rule3 := Expr.var 3 + + -- Analyze all expressions together (like Rust does) + let allExprs := #[fullType, rule1, rule2, rule3] + let result := analyzeBlock allExprs + let _effectiveSizes := computeEffectiveSizes result.infoMap result.topoOrder + let sharedHashes := decideSharing result.infoMap result.topoOrder + + -- Check each ref + let tRefHash := computeExprHash tRef + let c1Hash := computeExprHash c1 + let c2Hash := computeExprHash c2 + let c3Hash := computeExprHash c3 + + let tRefUsage := result.infoMap.get? tRefHash |>.map (·.usageCount) |>.getD 0 + let _c1Usage := result.infoMap.get? c1Hash |>.map (·.usageCount) |>.getD 0 + let _c2Usage := result.infoMap.get? c2Hash |>.map (·.usageCount) |>.getD 0 + let _c3Usage := result.infoMap.get? c3Hash |>.map (·.usageCount) |>.getD 0 + + -- Build sharing vector + let (_rewritten, _sharingVec) := buildSharingVec allExprs sharedHashes result.infoMap result.ptrToHash + + -- ref 0 (type) appears 2 times: in motiveType and target + -- With usage=2, size=2: potential = 1*2 - 2 = 0, NOT shared + + return group "realistic recursor" ( + test "type ref usage = 2" (tRefUsage == 2) ++ + test "analysis completes" true + ) + +/-- Test that content hash collision correctly increments usage. + If we create the same expression multiple times (different Lean pointers), + the sharing analysis should still count them as the same subterm. -/ +def testContentHashCollision : TestSeq := Id.run do + -- Create "ref 0 #[]" three times - different Lean objects, same content + let r1 := Expr.ref 0 #[] + let r2 := Expr.ref 0 #[] + let r3 := Expr.ref 0 #[] + + -- Wrap each in a different expression + let e1 := Expr.all r1 (Expr.sort 0) + let e2 := Expr.all r2 (Expr.sort 1) + let e3 := Expr.all r3 (Expr.sort 2) + + let result := analyzeBlock #[e1, e2, e3] + + let refHash := computeExprHash (Expr.ref 0 #[]) + let refUsage := result.infoMap.get? refHash |>.map (·.usageCount) |>.getD 0 + + return group "content hash collision" ( + test "ref usage counted via hash collision" (refUsage == (3 : Nat)) + ) + +/-! ## Cross-implementation sharing tests -/ + +/-- Test that Lean and Rust produce identical sharing decisions for a simple case. + Creates a set of expressions with known sharing opportunities and compares + the sharing vectors produced by both implementations. -/ +def testCrossImplSharingSimple : TestSeq := Id.run do + -- Create expressions with 4 usages of ref 0 #[] (should be shared) + let t := Expr.ref 0 #[] + let sortU := Expr.sort 1 + let e1 := Expr.all t sortU + let e2 := Expr.all t e1 + let e3 := Expr.all t e2 + let e4 := Expr.all t e3 + let exprs := #[e4] + + -- Run Lean's sharing analysis + let result := analyzeBlock exprs + let sharedHashes := decideSharing result.infoMap result.topoOrder + let (rewritten, sharingVec) := buildSharingVec exprs sharedHashes result.infoMap result.ptrToHash + + -- Run Rust's sharing analysis via FFI + let rustSharingCount := rsAnalyzeSharingCount exprs + let (isMatch, _leanCount, _rustCount) := unpackSharingComparison (rsCompareSharingAnalysis exprs sharingVec rewritten) + + return group "cross-impl simple" ( + test "sharing counts match" (sharingVec.size.toUInt64 == rustSharingCount) ++ + test "sharing vectors match" isMatch + ) + +/-- Test cross-implementation sharing for the content hash collision case. + This verifies both implementations correctly count usages via content hashing. -/ +def testCrossImplContentHash : TestSeq := Id.run do + -- Create "ref 0 #[]" three times with different wrappers + let r1 := Expr.ref 0 #[] + let r2 := Expr.ref 0 #[] + let r3 := Expr.ref 0 #[] + let e1 := Expr.all r1 (Expr.sort 0) + let e2 := Expr.all r2 (Expr.sort 1) + let e3 := Expr.all r3 (Expr.sort 2) + let exprs := #[e1, e2, e3] + + -- Run Lean's sharing analysis + let result := analyzeBlock exprs + let sharedHashes := decideSharing result.infoMap result.topoOrder + let (rewritten, sharingVec) := buildSharingVec exprs sharedHashes result.infoMap result.ptrToHash + + -- Run Rust's sharing analysis via FFI + let rustSharingCount := rsAnalyzeSharingCount exprs + let (isMatch, _leanCount, _rustCount) := unpackSharingComparison (rsCompareSharingAnalysis exprs sharingVec rewritten) + + return group "cross-impl content hash" ( + test "sharing counts match" (sharingVec.size.toUInt64 == rustSharingCount) ++ + test "sharing vectors match" isMatch + ) + +/-- Test cross-implementation sharing for a realistic recursor structure. + This mimics the actual failing case we see in the full test. -/ +def testCrossImplRecursor : TestSeq := Id.run do + -- Build a 3-constructor recursor type + -- Type: ∀ (motive : T → Sort u), minor1 → minor2 → minor3 → ∀ (t : T), motive t + let tRef := Expr.ref 0 #[] -- The inductive type + let c1 := Expr.ref 1 #[] -- Constructor 1 + let c2 := Expr.ref 2 #[] -- Constructor 2 + let c3 := Expr.ref 3 #[] -- Constructor 3 + let sortU := Expr.sort 1 + + -- Build from inside out + let targetBody := Expr.app (Expr.var 1) (Expr.var 0) -- motive t + let target := Expr.all tRef targetBody -- ∀ (t : T), motive t + + let minor3 := Expr.app (Expr.var 3) c3 + let withMinor3 := Expr.all minor3 target + + let minor2 := Expr.app (Expr.var 2) c2 + let withMinor2 := Expr.all minor2 withMinor3 + + let minor1 := Expr.app (Expr.var 1) c1 + let withMinor1 := Expr.all minor1 withMinor2 + + let motiveType := Expr.all tRef sortU + let fullType := Expr.all motiveType withMinor1 + + -- Rules (just return minor arguments) + let rule1 := Expr.var 1 + let rule2 := Expr.var 2 + let rule3 := Expr.var 3 + + let exprs := #[fullType, rule1, rule2, rule3] + + -- Debug: print Rust's view + let _ := rsDebugSharingAnalysis exprs + + -- Run Lean's sharing analysis + let result := analyzeBlock exprs + let sharedHashes := decideSharing result.infoMap result.topoOrder + let (rewritten, sharingVec) := buildSharingVec exprs sharedHashes result.infoMap result.ptrToHash + + -- Run Rust's sharing analysis via FFI + let rustSharingCount := rsAnalyzeSharingCount exprs + let (isMatch, _leanCount, _rustCount) := unpackSharingComparison (rsCompareSharingAnalysis exprs sharingVec rewritten) + + return group "cross-impl recursor" ( + test "sharing counts match" (sharingVec.size.toUInt64 == rustSharingCount) ++ + test "sharing vectors match" isMatch + ) + +/-! ## forall_imp and flip sharing tests -/ + +/-- Test forall_imp sharing using the exact Ixon.Expr structure from compile output. + + From the compile test, forall_imp has this structure (pre-sharing): + Type: all (sort 0) (all (all (var 0) (sort 1)) (all (all (var 1) (sort 1)) + (all (all (var 2) (all (app (var 2) (var 0)) (app (var 2) (var 1)))) + (all (all (var 3) (app (var 3) (var 0))) (all (var 4) (app (var 3) (var 0))))))) + + Value: similar structure with lam instead of all + + Key difference: Lean shares `app (var 2) (var 1)` (8 entries), Rust doesn't (7 entries) +-/ +def testForallImpReal : TestSeq := Id.run do + -- Build the forall_imp type structure based on compile output + -- ∀ (α : Sort u), ∀ (p : α → Prop), ∀ (q : α → Prop), + -- ∀ (h : ∀ a, p a → q a), (∀ a, p a) → (∀ a, q a) + -- + -- IMPORTANT: Create fresh objects for each occurrence to simulate real compilation + -- where the same content might have different Lean pointers + + -- Build type with fresh objects + let typ := Expr.all (Expr.sort 0) -- ∀ α : Sort u + (Expr.all (Expr.all (Expr.var 0) (Expr.sort 1)) -- ∀ p : α → Prop (fresh) + (Expr.all (Expr.all (Expr.var 1) (Expr.sort 1)) -- ∀ q : α → Prop (fresh!) + (Expr.all -- ∀ h : ∀ a, p a → q a + (Expr.all (Expr.var 2) + (Expr.all (Expr.app (Expr.var 2) (Expr.var 0)) -- p a → q a + (Expr.app (Expr.var 2) (Expr.var 1)))) + (Expr.all -- (∀ a, p a) → (∀ a, q a) + (Expr.all (Expr.var 3) (Expr.app (Expr.var 3) (Expr.var 0))) -- ∀ a, p a + (Expr.all (Expr.var 4) (Expr.app (Expr.var 3) (Expr.var 0))))))) -- ∀ a, q a + + -- Build value with fresh objects + let value := Expr.lam (Expr.sort 0) -- λ α + (Expr.lam (Expr.all (Expr.var 0) (Expr.sort 1)) -- λ p : α → Prop (fresh) + (Expr.lam (Expr.all (Expr.var 1) (Expr.sort 1)) -- λ q : α → Prop (fresh!) + (Expr.lam -- λ h : ∀ a, p a → q a + (Expr.all (Expr.var 2) + (Expr.all (Expr.app (Expr.var 2) (Expr.var 0)) -- p a → q a (fresh) + (Expr.app (Expr.var 2) (Expr.var 1)))) -- (fresh) + (Expr.lam -- λ h' : ∀ a, p a + (Expr.all (Expr.var 3) (Expr.app (Expr.var 3) (Expr.var 0))) -- (fresh) + (Expr.lam (Expr.var 4) -- λ a : α + (Expr.app + (Expr.app (Expr.var 2) (Expr.var 0)) -- h a + (Expr.app (Expr.var 1) (Expr.var 0)))))))) -- h' a + + let exprs := #[typ, value] + + -- Run Lean sharing analysis + let result := analyzeBlock exprs + let sharedHashes := decideSharing result.infoMap result.topoOrder + let (rewritten, sharingVec) := buildSharingVec exprs sharedHashes result.infoMap result.ptrToHash + + -- Run Rust sharing analysis + let rustCount := rsAnalyzeSharingCount exprs + let (isMatch, _leanCnt, _rustCnt) := unpackSharingComparison (rsCompareSharingAnalysis exprs sharingVec rewritten) + + return group "forall_imp real" ( + test "sharing counts match" (sharingVec.size.toUInt64 == rustCount) ++ + test "sharing vectors match" isMatch + ) + +/-- Test forall_imp sharing difference. + + forall_imp has type: + ∀ (α : Sort u), ∀ (p q : α → Prop), (∀ a, p a → q a) → (∀ a, p a) → (∀ a, q a) + + Key expressions: + - `α → Prop` (appears twice for p and q binder types) + - `app (var N) (var M)` patterns like `p a`, `q a` + + Lean shares both `app (var 2) (var 1)` and `app (var 2) (var 0)` → 8 entries + Rust only shares `app (var 2) (var 0)` → 7 entries + + This tests the profitability calculation difference. +-/ +def testForallImpSharing : TestSeq := Id.run do + -- Simplified forall_imp structure focusing on the key pattern + -- The pattern that differs: `all (app var2 var0) (app var2 var1)` + -- appears in the type as `p a → q a` + + -- Create expressions where `app (var 2) (var 1)` and `app (var 2) (var 0)` both appear twice + let app21 := Expr.app (Expr.var 2) (Expr.var 1) + let app20 := Expr.app (Expr.var 2) (Expr.var 0) + + -- Use each twice + let e1 := Expr.all app21 app20 -- p a → q a pattern + let e2 := Expr.all app20 app21 -- q a → p a (reversed) + let e3 := Expr.all (Expr.var 3) e1 -- ∀ a, p a → q a + let e4 := Expr.all (Expr.var 3) e2 -- ∀ a, q a → p a + + let exprs := #[e3, e4] + + -- Analyze with Lean + let result := analyzeBlock exprs + let sharedHashes := decideSharing result.infoMap result.topoOrder + let (rewritten, sharingVec) := buildSharingVec exprs sharedHashes result.infoMap result.ptrToHash + + -- Check usage and profitability of app21 and app20 + let app21Hash := computeExprHash app21 + let app20Hash := computeExprHash app20 + + let app21Info := result.infoMap.get? app21Hash + let app20Info := result.infoMap.get? app20Hash + + let app21Usage := app21Info.map (·.usageCount) |>.getD 0 + let app20Usage := app20Info.map (·.usageCount) |>.getD 0 + + -- Run Rust's sharing analysis via FFI + let rustSharingCount := rsAnalyzeSharingCount exprs + let (isMatch, _leanCount, _rustCount) := unpackSharingComparison (rsCompareSharingAnalysis exprs sharingVec rewritten) + + return group "forall_imp sharing" ( + test "app20 used twice" (app20Usage >= 2) ++ + test "app21 used twice" (app21Usage >= 2) ++ + test "sharing counts match" (sharingVec.size.toUInt64 == rustSharingCount) ++ + test "sharing vectors match" isMatch + ) + +def testFlipSharing : TestSeq := Id.run do + -- Recreate the `flip` function structure + -- Type: ∀ (A : Sort 0), ∀ (B : Sort 1), ∀ (C : Sort 2), (A → B → C) → B → A → C + -- In Ixon.Expr terms: + -- all (sort 0) (all (sort 1) (all (sort 2) (all (all (var 2) (all (var 4) (var 3)))))) + -- where = all (var 2) (all (var 2) (var 2)) + + -- The key subexpression that appears multiple times + let innerAll := Expr.all (Expr.var 2) (Expr.var 2) -- (var 2) → (var 2) + let funcType := Expr.all (Expr.var 2) innerAll -- (var 2) → (var 2) → (var 2) + + -- Build the full type + let resultType := Expr.all (Expr.var 2) (Expr.all (Expr.var 4) (Expr.var 3)) -- B → A → C + let withFunc := Expr.all funcType resultType -- (A → B → C) → B → A → C + let withC := Expr.all (Expr.sort 2) withFunc + let withB := Expr.all (Expr.sort 1) withC + let typ := Expr.all (Expr.sort 0) withB + + -- Value: λ A B C f b a => f a b + -- In Ixon.Expr terms: lam ... lam (app (app (var 2) (var 0)) (var 1)) + let body := Expr.app (Expr.app (Expr.var 2) (Expr.var 0)) (Expr.var 1) -- f a b + let lamA := Expr.lam (Expr.var 4) body + let lamB := Expr.lam (Expr.var 2) lamA + let lamFunc := Expr.lam funcType lamB -- Note: funcType appears again here! + let lamC := Expr.lam (Expr.sort 2) lamFunc + let lamBSort := Expr.lam (Expr.sort 1) lamC + let value := Expr.lam (Expr.sort 0) lamBSort + + let exprs := #[typ, value] + + -- Analyze with Lean + let result := analyzeBlock exprs + let sharedHashes := decideSharing result.infoMap result.topoOrder + let (rewritten, sharingVec) := buildSharingVec exprs sharedHashes result.infoMap result.ptrToHash + + -- Check what got shared + let innerAllHash := computeExprHash innerAll + let funcTypeHash := computeExprHash funcType + + let innerAllInfo := result.infoMap.get? innerAllHash + let funcTypeInfo := result.infoMap.get? funcTypeHash + + let innerAllUsage := innerAllInfo.map (·.usageCount) |>.getD 0 + let funcTypeUsage := funcTypeInfo.map (·.usageCount) |>.getD 0 + + -- Run Rust's sharing analysis via FFI + let rustSharingCount := rsAnalyzeSharingCount exprs + let (isMatch, _leanCount, _rustCount) := unpackSharingComparison (rsCompareSharingAnalysis exprs sharingVec rewritten) + + return group "flip sharing" ( + test "funcType used twice" (funcTypeUsage >= 2) ++ + test "innerAll used multiple times" (innerAllUsage >= 2) ++ + test "sharing counts match" (sharingVec.size.toUInt64 == rustSharingCount) ++ + test "sharing vectors match" isMatch + ) + +/-! ## Suite -/ + +def suite : List TestSeq := [ + testForallImpReal, + testForallImpSharing, + testUint64ToBytesLength, + testUint64ToBytesEncoding, + testHashConsistency, + testHashDifferent, + testSortHashBuffer, + testVarHashBuffer, + testAppHashBuffer, + testNoSharing, + testWithSharing, + testSharingTopoOrder, + testKnownHashVar0, + testKnownHashSort0, + testRecursorSharing, + testRealisticRecursor, + testContentHashCollision, + testCrossImplSharingSimple, + testCrossImplContentHash, + testCrossImplRecursor, + testFlipSharing, +] + +end Tests.Sharing diff --git a/Tests/IxVM.lean b/Tests/IxVM.lean index 82017c6b..71c6084b 100644 --- a/Tests/IxVM.lean +++ b/Tests/IxVM.lean @@ -2,7 +2,6 @@ import Tests.Common import Ix.IxVM import Ix.Aiur.Simple import Ix.Aiur.Compile -import Ix.Ixon import Blake3 def mkBlake3HashTestCase (size : Nat) : AiurTestCase := @@ -13,14 +12,6 @@ def mkBlake3HashTestCase (size : Nat) : AiurTestCase := let buffer := ⟨input, .ofList [(#[0], ⟨0, size⟩)]⟩ -- key is fixed as #[0] ⟨`blake3_test, #[], output, buffer, buffer⟩ -def mkIxonSerdeTestCase (ixon : Ixon.Ixon) : AiurTestCase := - let bytes := Ixon.ser ixon - let size := bytes.size - let ⟨⟨hash⟩, _⟩ := Blake3.hash bytes - let hashG := hash.map .ofUInt8 - let buffer := ⟨bytes.data.map .ofUInt8, .ofList [(hashG, ⟨0, size⟩)]⟩ - ⟨`ixon_blake3_test, hashG, #[], buffer, buffer⟩ - def ixTestCases : List AiurTestCase := [ .noIO `relaxed_u64_succ #[0, 0, 0, 0, 0, 0, 0, 0] #[1, 0, 0, 0, 0, 0, 0, 0], .noIO `relaxed_u64_succ #[255, 0, 0, 0, 0, 0, 0, 0] #[0, 1, 0, 0, 0, 0, 0, 0], @@ -47,28 +38,6 @@ def ixTestCases : List AiurTestCase := [ mkBlake3HashTestCase 3104, mkBlake3HashTestCase 3136, mkBlake3HashTestCase 3168, - mkIxonSerdeTestCase .nanon, - mkIxonSerdeTestCase (.nstr default default), - mkIxonSerdeTestCase (.nnum default default), - mkIxonSerdeTestCase .uzero, - mkIxonSerdeTestCase (.usucc default), - mkIxonSerdeTestCase (.umax default default), - mkIxonSerdeTestCase (.uimax default default), - mkIxonSerdeTestCase (.uvar 42), - mkIxonSerdeTestCase (.evar 42), - mkIxonSerdeTestCase (.esort default), - mkIxonSerdeTestCase (.estr default), - mkIxonSerdeTestCase (.enat default), - mkIxonSerdeTestCase (.eapp default default), - mkIxonSerdeTestCase (.elam default default), - mkIxonSerdeTestCase (.eall default default), - mkIxonSerdeTestCase (.elet false default default default), - mkIxonSerdeTestCase (.elet true default default default), - mkIxonSerdeTestCase (.eval ⟨default, default, default, default⟩), - mkIxonSerdeTestCase (.chck ⟨default, default, default⟩), - mkIxonSerdeTestCase (.comm ⟨default, default⟩), - mkIxonSerdeTestCase (.blob ⟨#[0, 1, 2, 3]⟩), - mkIxonSerdeTestCase (.blob ⟨#[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]⟩), ] def Tests.IxVM.suite := [ diff --git a/Tests/IxVMTest.lean b/Tests/IxVMTest.lean new file mode 100644 index 00000000..3578f99a --- /dev/null +++ b/Tests/IxVMTest.lean @@ -0,0 +1,8 @@ +import Tests.IxVM + +def testSuite : Std.HashMap String (List LSpec.TestSeq) := .ofList [ + ("ixvm", Tests.IxVM.suite), +] + +def main (args : List String) : IO UInt32 := do + LSpec.lspecIO testSuite args diff --git a/Tests/Main.lean b/Tests/Main.lean index 9601e1b9..e25300a8 100644 --- a/Tests/Main.lean +++ b/Tests/Main.lean @@ -1,32 +1,79 @@ -import Tests.Aiur -import Tests.FFIConsistency import Tests.ByteArray -import Tests.Ix import Tests.Ix.Ixon +import Tests.Ix.Claim +import Tests.Ix.Commit import Tests.Ix.Compile -import Tests.IxVM +import Tests.Ix.Decompile +import Tests.Ix.RustSerialize +import Tests.Ix.RustDecompile +import Tests.Ix.Sharing +import Tests.Ix.CanonM +import Tests.Ix.GraphM +import Tests.Ix.CondenseM +import Tests.FFI import Tests.Keccak import Tests.Cli +import Tests.ShardMap +import Ix.Common +import Ix.Meta @[extern "rs_tmp_decode_const_map"] opaque tmpDecodeConstMap : @& List (Lean.Name × Lean.ConstantInfo) → USize -def main (args: List String) : IO UInt32 := do +/-- Primary test suites - run by default -/ +def primarySuites : Std.HashMap String (List LSpec.TestSeq) := .ofList [ + ("ffi", Tests.FFI.suite), + ("byte-array", Tests.ByteArray.suite), + ("ixon", Tests.Ixon.suite), + ("claim", Tests.Claim.suite), + ("commit", Tests.Commit.suite), + ("canon", [Tests.CanonM.suite]), + ("keccak", Tests.Keccak.suite), + ("sharing", Tests.Sharing.suite), + ("graph-unit", Tests.Ix.GraphM.suite), + ("condense-unit", Tests.Ix.CondenseM.suite), +] + +/-- Ignored test suites - expensive, run only when explicitly requested. These require significant RAM -/ +def ignoredSuites : Std.HashMap String (List LSpec.TestSeq) := .ofList [ + ("shard-map", Tests.ShardMap.suite), + ("rust-canon-roundtrip", Tests.CanonM.rustSuiteIO), + ("serial-canon-roundtrip", Tests.CanonM.serialSuiteIO), + ("parallel-canon-roundtrip", Tests.CanonM.parallelSuiteIO), + ("graph-cross", Tests.Ix.GraphM.suiteIO), + ("condense-cross", Tests.Ix.CondenseM.suiteIO), + ("compile", Tests.Compile.compileSuiteIO), + ("decompile", Tests.Decompile.decompileSuiteIO), + ("rust-serialize", Tests.RustSerialize.rustSerializeSuiteIO), + ("rust-decompile", Tests.RustDecompile.rustDecompileSuiteIO), + ("commit-io", Tests.Commit.suiteIO), +] + +def main (args : List String) : IO UInt32 := do + -- Special case: rust-compile diagnostic if args.contains "rust-compile" then let env ← get_env! - println! tmpDecodeConstMap env.constants.toList + IO.println s!"Loaded environment with {env.constants.toList.length} constants" + let result := tmpDecodeConstMap env.constants.toList + IO.println s!"Rust compiled: {result}" return 0 - else if args.contains "compile" then LSpec.lspecEachIO Tests.Ix.Compile.suiteIO id - else if args.contains "cli" then - Tests.Cli.suite + + -- Special case: cli tests have their own runner + if args.contains "cli" then + return ← Tests.Cli.suite + + let runIgnored := args.contains "--ignored" + let filterArgs := args.filter (· != "--ignored") + + -- Check if any filterArg matches an ignored suite + let ignoredRequested := filterArgs.any (ignoredSuites.contains ·) + + -- Run primary tests + let primaryResult ← LSpec.lspecIO primarySuites filterArgs + if primaryResult != 0 then return primaryResult + + -- Run ignored tests if --ignored flag or specific ignored suite requested + if runIgnored || ignoredRequested then + LSpec.lspecIO ignoredSuites filterArgs else - LSpec.lspecIO (.ofList [ - ("aiur", Tests.Aiur.suite), - ("ffi-consistency", Tests.FFIConsistency.suite), - ("byte-array", Tests.ByteArray.suite), - ("ix", Tests.Ix.suite), - --("ixon-units", Tests.Ixon.units), - ("ixon", Tests.Ixon.suite), - ("ixvm", Tests.IxVM.suite), - ("keccak", Tests.Keccak.suite), - ]) args + return 0 diff --git a/Tests/ShardMap.lean b/Tests/ShardMap.lean new file mode 100644 index 00000000..c7ed382a --- /dev/null +++ b/Tests/ShardMap.lean @@ -0,0 +1,430 @@ +import Ix.ShardMap +import LSpec + +open LSpec Ix + +namespace Tests.ShardMap + +def testInsertAndGet : TestSeq := + .individualIO "basic insert and get" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "hello" 42 + let v ← map.get? "hello" + pure (v == some 42, none)) .done + +def testGetNonExistent : TestSeq := + .individualIO "get non-existent returns none" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + let v ← map.get? "nonexistent" + pure (v == none, none)) .done + +def testMultipleInserts : TestSeq := + .individualIO "multiple inserts" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "foo" 1 + map.insert "bar" 2 + map.insert "baz" 3 + let v1 ← map.get? "foo" + let v2 ← map.get? "bar" + let v3 ← map.get? "baz" + pure (v1 == some 1 && v2 == some 2 && v3 == some 3, none)) .done + +def testOverwrite : TestSeq := + .individualIO "overwrite existing key" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "key" 1 + map.insert "key" 2 + let v ← map.get? "key" + pure (v == some 2, none)) .done + +def testSize : TestSeq := + .individualIO "size" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "a" 1 + map.insert "b" 2 + map.insert "c" 3 + let sz ← map.size + pure (sz == 3, none)) .done + +def testContains : TestSeq := + .individualIO "contains" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "exists" 1 + let c1 ← map.contains "exists" + let c2 ← map.contains "missing" + pure (c1 && !c2, none)) .done + +def testRemove : TestSeq := + .individualIO "remove" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "key" 42 + let removed ← map.remove "key" + let after ← map.get? "key" + pure (removed == some 42 && after == none, none)) .done + +def testModify : TestSeq := + .individualIO "modify" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "key" 10 + let modified ← map.modify "key" (· + 5) + let v ← map.get? "key" + pure (modified && v == some 15, none)) .done + +def testGetOrInsertExisting : TestSeq := + .individualIO "getOrInsert existing" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "key" 42 + let v ← map.getOrInsert "key" (fun () => pure 999) + pure (v == 42, none)) .done + +def testGetOrInsertNew : TestSeq := + .individualIO "getOrInsert new" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + let v ← map.getOrInsert "key" (fun () => pure 999) + let check ← map.get? "key" + pure (v == 999 && check == some 999, none)) .done + +def testClear : TestSeq := + .individualIO "clear" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "a" 1 + map.insert "b" 2 + map.clear + let sz ← map.size + pure (sz == 0, none)) .done + +def testToList : TestSeq := + .individualIO "toList" (do + let map ← ShardMap.new (α := Nat) (β := String) (shardBits := 1) + map.insert 1 "one" + map.insert 2 "two" + let list ← map.toList + pure (list.length == 2, none)) .done + +-- Concurrent tests + +/-- Test concurrent reads don't block each other with SharedMutex -/ +def testConcurrentReads : TestSeq := + .individualIO "concurrent reads" (do + let map ← ShardMap.new (α := Nat) (β := Nat) (shardBits := 2) + -- Insert many values + for i in [:1000] do + map.insert i (i * 2) + -- Spawn many concurrent readers + let numReaders := 32 + let mut tasks : Array (Task (Except IO.Error Bool)) := #[] + for _ in [:numReaders] do + let task ← IO.asTask do + let mut allOk := true + for i in [:1000] do + let v ← map.get? i + if v != some (i * 2) then allOk := false + pure allOk + tasks := tasks.push task + -- Wait for all readers + let mut allPassed := true + for task in tasks do + match task.get with + | .ok ok => if !ok then allPassed := false + | .error _ => allPassed := false + pure (allPassed, none)) .done + +/-- Test concurrent writes to different keys -/ +def testConcurrentWritesDifferentKeys : TestSeq := + .individualIO "concurrent writes different keys" (do + let map ← ShardMap.new (α := Nat) (β := Nat) (shardBits := 4) + let numWriters := 16 + let keysPerWriter := 100 + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for w in [:numWriters] do + let task ← IO.asTask do + for k in [:keysPerWriter] do + let key := w * keysPerWriter + k + map.insert key (key * 3) + tasks := tasks.push task + -- Wait for all writers and check for errors + for task in tasks do + let _ ← IO.ofExcept task.get + -- Verify all values + let mut allCorrect := true + for i in [:(numWriters * keysPerWriter)] do + let v ← map.get? i + if v != some (i * 3) then allCorrect := false + pure (allCorrect, none)) .done + +/-- Test concurrent getOrInsert with same keys (race condition test) -/ +def testConcurrentGetOrInsert : TestSeq := + .individualIO "concurrent getOrInsert consistency" (do + let map ← ShardMap.new (α := Nat) (β := Nat) (shardBits := 2) + let numWorkers := 32 + let numKeys := 100 + -- All workers try to getOrInsert the same keys + let mut tasks : Array (Task (Except IO.Error (Array Nat))) := #[] + for w in [:numWorkers] do + let task ← IO.asTask do + let mut results : Array Nat := #[] + for k in [:numKeys] do + let v ← map.getOrInsert k (fun () => pure (w * 1000 + k)) + results := results.push v + pure results + tasks := tasks.push task + -- Wait for all workers and collect results + let mut allResults : Array (Array Nat) := #[] + for task in tasks do + match task.get with + | .ok results => allResults := allResults.push results + | .error _ => pure () + -- For each key, all workers should get the same value (whoever inserted first wins) + let mut consistent := true + for k in [:numKeys] do + let firstVal := allResults[0]![k]! + for results in allResults do + if results[k]! != firstVal then consistent := false + pure (consistent, none)) .done + +/-- Test getOrInsertLazy works correctly -/ +def testGetOrInsertLazy : TestSeq := + .individualIO "getOrInsertLazy" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + let v1 ← map.getOrInsertLazy "key" (fun () => 42) + let v2 ← map.getOrInsertLazy "key" (fun () => 999) -- Should not be called + pure (v1 == 42 && v2 == 42, none)) .done + +/-- Test getOrInsertIO works correctly -/ +def testGetOrInsertIO : TestSeq := + .individualIO "getOrInsertIO" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + let counter ← IO.mkRef 0 + let v1 ← map.getOrInsertIO "key" (fun () => do counter.modify (· + 1); pure 42) + let v2 ← map.getOrInsertIO "key" (fun () => do counter.modify (· + 1); pure 999) + let calls ← counter.get + pure (v1 == 42 && v2 == 42 && calls == 1, none)) .done + +/-! ## Tests for try operations -/ + +/-- Test tryGet? returns value when unlocked -/ +def testTryGetUnlocked : TestSeq := + .individualIO "tryGet? unlocked" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "key" 42 + let v ← map.tryGet? "key" + pure (v == some (some 42), none)) .done + +/-- Test tryGet? returns none for non-existent key -/ +def testTryGetNonExistent : TestSeq := + .individualIO "tryGet? non-existent" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + let v ← map.tryGet? "missing" + pure (v == some none, none)) .done + +/-- Test tryInsert succeeds when unlocked -/ +def testTryInsert : TestSeq := + .individualIO "tryInsert" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + let ok ← map.tryInsert "key" 42 + let v ← map.get? "key" + pure (ok && v == some 42, none)) .done + +/-- Test tryGetOrInsertLazy works correctly -/ +def testTryGetOrInsertLazy : TestSeq := + .individualIO "tryGetOrInsertLazy" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + let v1 ← map.tryGetOrInsertLazy "key" (fun () => 42) + let v2 ← map.tryGetOrInsertLazy "key" (fun () => 999) -- Should return existing + pure (v1 == some 42 && v2 == some 42, none)) .done + +/-! ## Tests for insertMany -/ + +/-- Test insertMany inserts all items correctly -/ +def testInsertMany : TestSeq := + .individualIO "insertMany" (do + let map ← ShardMap.new (α := Nat) (β := String) (shardBits := 2) + let items := #[(1, "one"), (2, "two"), (3, "three"), (4, "four"), (5, "five")] + map.insertMany items + let v1 ← map.get? 1 + let v2 ← map.get? 2 + let v3 ← map.get? 3 + let v4 ← map.get? 4 + let v5 ← map.get? 5 + let sz ← map.size + pure (v1 == some "one" && v2 == some "two" && v3 == some "three" + && v4 == some "four" && v5 == some "five" && sz == 5, none)) .done + +/-- Test insertMany with empty array -/ +def testInsertManyEmpty : TestSeq := + .individualIO "insertMany empty" (do + let map ← ShardMap.new (α := Nat) (β := Nat) (shardBits := 2) + map.insertMany #[] + let sz ← map.size + pure (sz == 0, none)) .done + +/-- Test insertMany overwrites existing keys -/ +def testInsertManyOverwrite : TestSeq := + .individualIO "insertMany overwrite" (do + let map ← ShardMap.new (α := Nat) (β := Nat) (shardBits := 2) + map.insert 1 100 + map.insertMany #[(1, 200), (2, 300)] + let v1 ← map.get? 1 + let v2 ← map.get? 2 + pure (v1 == some 200 && v2 == some 300, none)) .done + +/-- Test concurrent insertMany operations -/ +def testConcurrentInsertMany : TestSeq := + .individualIO "concurrent insertMany" (do + let map ← ShardMap.new (α := Nat) (β := Nat) (shardBits := 4) + let numWorkers := 8 + let itemsPerWorker := 100 + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for w in [:numWorkers] do + let task ← IO.asTask do + let items : Array (Nat × Nat) := Array.range itemsPerWorker |>.map fun i => + let key := w * itemsPerWorker + i + (key, key * 2) + map.insertMany items + tasks := tasks.push task + -- Wait for all workers + for task in tasks do + let _ ← IO.ofExcept task.get + -- Verify all values + let mut allCorrect := true + for i in [:(numWorkers * itemsPerWorker)] do + let v ← map.get? i + if v != some (i * 2) then allCorrect := false + let sz ← map.size + pure (allCorrect && sz == numWorkers * itemsPerWorker, none)) .done + +/-! ## Tests for modifyGet -/ + +/-- Test modifyGet returns result and updates value -/ +def testModifyGet : TestSeq := + .individualIO "modifyGet" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "counter" 10 + let result ← map.modifyGet "counter" fun v => (v, v + 1) + let newVal ← map.get? "counter" + pure (result == some 10 && newVal == some 11, none)) .done + +/-- Test modifyGet returns none for non-existent key -/ +def testModifyGetNonExistent : TestSeq := + .individualIO "modifyGet non-existent" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + let result ← map.modifyGet "missing" fun v => (v, v + 1) + pure (result == none, none)) .done + +/-- Test modifyGet can return different type than value -/ +def testModifyGetDifferentType : TestSeq := + .individualIO "modifyGet different type" (do + let map ← ShardMap.new (α := String) (β := Nat) (shardBits := 2) + map.insert "key" 42 + let result ← map.modifyGet "key" fun v => (s!"was {v}", v * 2) + let newVal ← map.get? "key" + pure (result == some "was 42" && newVal == some 84, none)) .done + +/-- Test concurrent modifyGet operations -/ +def testConcurrentModifyGet : TestSeq := + .individualIO "concurrent modifyGet" (do + let map ← ShardMap.new (α := Nat) (β := Nat) (shardBits := 2) + -- Initialize counters + for i in [:10] do + map.insert i 0 + -- Many workers increment counters + let numWorkers := 32 + let incrementsPerWorker := 100 + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for _ in [:numWorkers] do + let task ← IO.asTask do + for _ in [:incrementsPerWorker] do + for i in [:10] do + let _ ← map.modifyGet i fun v => ((), v + 1) + tasks := tasks.push task + -- Wait for all workers + for task in tasks do + let _ ← IO.ofExcept task.get + -- Each counter should have been incremented numWorkers * incrementsPerWorker times + let expected := numWorkers * incrementsPerWorker + let mut allCorrect := true + for i in [:10] do + let v ← map.get? i + if v != some expected then allCorrect := false + pure (allCorrect, none)) .done + +/-! ## Tests for newWithCapacity -/ + +/-- Test newWithCapacity creates a working map -/ +def testNewWithCapacity : TestSeq := + .individualIO "newWithCapacity" (do + let map ← ShardMap.newWithCapacity (α := Nat) (β := Nat) + (shardBits := 4) (capacityPerShard := 1000) + -- Insert many values + for i in [:500] do + map.insert i (i * 2) + -- Verify values + let mut allOk := true + for i in [:500] do + let v ← map.get? i + if v != some (i * 2) then allOk := false + let sz ← map.size + pure (allOk && sz == 500, none)) .done + +/-- Test concurrent tryGet? operations -/ +def testConcurrentTryGet : TestSeq := + .individualIO "concurrent tryGet?" (do + let map ← ShardMap.new (α := Nat) (β := Nat) (shardBits := 2) -- 4 shards + for i in [:100] do + map.insert i i + -- Many readers trying tryGet? + let numReaders := 32 + let successCount ← IO.mkRef 0 + let mut tasks : Array (Task (Except IO.Error Unit)) := #[] + for _ in [:numReaders] do + let task ← IO.asTask do + for i in [:100] do + match ← map.tryGet? i with + | some _ => successCount.modify (· + 1) + | none => pure () -- Shard was locked (rare with reads) + tasks := tasks.push task + for task in tasks do + match task.get with + | .ok () => pure () + | .error _ => pure () + let successes ← successCount.get + -- Most should succeed (reads don't block each other with SharedMutex) + -- With 32 readers × 100 keys = 3200 operations, expect at least 100 to succeed + -- (conservative threshold to avoid flakiness) + pure (successes > 100, none)) .done + +def suite : List TestSeq := [ + testInsertAndGet, + testGetNonExistent, + testMultipleInserts, + testOverwrite, + testSize, + testContains, + testRemove, + testModify, + testGetOrInsertExisting, + testGetOrInsertNew, + testClear, + testToList, + testConcurrentReads, + testConcurrentWritesDifferentKeys, + testConcurrentGetOrInsert, + testGetOrInsertLazy, + testGetOrInsertIO, + testTryGetUnlocked, + testTryGetNonExistent, + testTryInsert, + testTryGetOrInsertLazy, + testNewWithCapacity, + testConcurrentTryGet, + testInsertMany, + testInsertManyEmpty, + testInsertManyOverwrite, + testConcurrentInsertMany, + testModifyGet, + testModifyGetNonExistent, + testModifyGetDifferentType, + testConcurrentModifyGet, +] + +end Tests.ShardMap diff --git a/c/ixon_ffi.c b/c/ixon_ffi.c new file mode 100644 index 00000000..4823b87a --- /dev/null +++ b/c/ixon_ffi.c @@ -0,0 +1,243 @@ +#include "lean/lean.h" +#include + +// Lean's internal mpz allocation - takes ownership of the mpz_t value +// (declared in Lean's runtime but not exposed in public headers) +extern lean_object * lean_alloc_mpz(mpz_t v); +#include "common.h" +#include "rust.h" + +// External class for RustCompiledEnv +static lean_external_class *g_rust_compiled_env_class = NULL; + +static lean_external_class *get_rust_compiled_env_class() { + if (g_rust_compiled_env_class == NULL) { + g_rust_compiled_env_class = lean_register_external_class( + &rs_free_rust_env, + &noop_foreach + ); + } + return g_rust_compiled_env_class; +} + +// FFI wrapper: Test round-trip (just pass through, returns scalar) +extern uint64_t c_rs_test_ffi_roundtrip(b_lean_obj_arg name) { + return rs_test_ffi_roundtrip(name); +} + +// FFI wrapper: Compile environment with Rust +// Returns: IO RustCompiledEnv (external object) +extern lean_obj_res c_rs_compile_env_rust_first(b_lean_obj_arg env_consts, lean_obj_arg world) { + void *rust_env = rs_compile_env_rust_first(env_consts); + if (rust_env == NULL) { + // Return IO error + lean_object *err = lean_mk_string("Rust compilation failed"); + lean_object *io_err = lean_io_result_mk_error(lean_mk_io_user_error(err)); + return io_err; + } + lean_object *external = lean_alloc_external(get_rust_compiled_env_class(), rust_env); + return lean_io_result_mk_ok(external); +} + +// FFI wrapper: Free RustCompiledEnv +// Returns: IO Unit +extern lean_obj_res c_rs_free_rust_env(lean_obj_arg rust_env_obj, lean_obj_arg world) { + // The external object will be freed by Lean's GC when it's no longer referenced + // We don't need to do anything here since we registered a finalizer + lean_dec(rust_env_obj); + return lean_io_result_mk_ok(lean_box(0)); +} + +// FFI wrapper: Get block count +extern uint64_t c_rs_get_rust_env_block_count(b_lean_obj_arg rust_env_obj) { + void *rust_env = lean_get_external_data(rust_env_obj); + return rs_get_rust_env_block_count(rust_env); +} + +// FFI wrapper: Compare a single block +extern uint64_t c_rs_compare_block( + b_lean_obj_arg rust_env_obj, + b_lean_obj_arg name, + b_lean_obj_arg lean_bytes +) { + void *rust_env = lean_get_external_data(rust_env_obj); + return rs_compare_block(rust_env, name, lean_bytes); +} + +// FFI wrapper: Get Rust block bytes as ByteArray +// Returns: IO ByteArray +extern lean_obj_res c_rs_get_block_bytes( + b_lean_obj_arg rust_env_obj, + b_lean_obj_arg name, + lean_obj_arg world +) { + void *rust_env = lean_get_external_data(rust_env_obj); + + // Get the length first + uint64_t len = rs_get_block_bytes_len(rust_env, name); + + // Allocate ByteArray + lean_object *byte_array = lean_alloc_sarray(1, len, len); + + // Copy bytes into it + if (len > 0) { + rs_copy_block_bytes(rust_env, name, byte_array); + } + + return lean_io_result_mk_ok(byte_array); +} + +// FFI wrapper: Get Rust sharing vector length +extern uint64_t c_rs_get_block_sharing_len( + b_lean_obj_arg rust_env_obj, + b_lean_obj_arg name +) { + void *rust_env = lean_get_external_data(rust_env_obj); + return rs_get_block_sharing_len(rust_env, name); +} + +// FFI wrapper: Get pre-sharing expressions buffer length +extern uint64_t c_rs_get_pre_sharing_exprs_len( + b_lean_obj_arg rust_env_obj, + b_lean_obj_arg name +) { + void *rust_env = lean_get_external_data(rust_env_obj); + return rs_get_pre_sharing_exprs_len(rust_env, name); +} + +// FFI wrapper: Get pre-sharing expressions +// Returns: IO UInt64 (number of expressions) +extern lean_obj_res c_rs_get_pre_sharing_exprs( + b_lean_obj_arg rust_env_obj, + b_lean_obj_arg name, + lean_obj_arg out_buf, + lean_obj_arg world +) { + void *rust_env = lean_get_external_data(rust_env_obj); + uint64_t n_exprs = rs_get_pre_sharing_exprs(rust_env, name, out_buf); + return lean_io_result_mk_ok(lean_box_uint64(n_exprs)); +} + +// FFI wrapper: Look up a constant's compiled address +// Returns: IO Bool (true if found) +extern lean_obj_res c_rs_lookup_const_addr( + b_lean_obj_arg rust_env_obj, + b_lean_obj_arg name, + lean_obj_arg out_addr, + lean_obj_arg world +) { + void *rust_env = lean_get_external_data(rust_env_obj); + uint64_t found = rs_lookup_const_addr(rust_env, name, out_addr); + return lean_io_result_mk_ok(lean_box(found != 0)); +} + +// FFI wrapper: Get compiled constant count +extern uint64_t c_rs_get_compiled_const_count(b_lean_obj_arg rust_env_obj) { + void *rust_env = lean_get_external_data(rust_env_obj); + return rs_get_compiled_const_count(rust_env); +} + +// ============================================================================= +// Lean C API wrappers for Rust to call +// These wrap Lean's allocation functions so they can be linked from Rust +// ============================================================================= + +lean_object *c_lean_alloc_ctor(unsigned tag, unsigned num_objs, unsigned scalar_sz) { + return lean_alloc_ctor(tag, num_objs, scalar_sz); +} + +void c_lean_ctor_set(lean_object *o, unsigned i, lean_object *v) { + lean_ctor_set(o, i, v); +} + +lean_object *c_lean_ctor_get(lean_object *o, unsigned i) { + return lean_ctor_get(o, i); +} + +unsigned c_lean_obj_tag(lean_object *o) { + return lean_obj_tag(o); +} + +void c_lean_ctor_set_uint8(lean_object *o, unsigned offset, uint8_t v) { + lean_ctor_set_uint8(o, offset, v); +} + +void c_lean_ctor_set_uint64(lean_object *o, unsigned offset, uint64_t v) { + lean_ctor_set_uint64(o, offset, v); +} + +lean_object *c_lean_mk_string(char const *s) { + return lean_mk_string(s); +} + +lean_object *c_lean_alloc_sarray(unsigned elem_size, size_t size, size_t capacity) { + return lean_alloc_sarray(elem_size, size, capacity); +} + +uint8_t *c_lean_sarray_cptr(lean_object *o) { + return lean_sarray_cptr(o); +} + +lean_object *c_lean_alloc_array(size_t size, size_t capacity) { + return lean_alloc_array(size, capacity); +} + +void c_lean_array_set_core(lean_object *o, size_t i, lean_object *v) { + lean_array_set_core(o, i, v); +} + +lean_object *c_lean_array_get_core(lean_object *o, size_t i) { + return lean_array_get_core(o, i); +} + +void c_lean_inc(lean_object *o) { + lean_inc(o); +} + +void c_lean_inc_n(lean_object *o, size_t n) { + lean_inc_n(o, n); +} + +lean_object *c_lean_io_result_mk_ok(lean_object *v) { + return lean_io_result_mk_ok(v); +} + +lean_object *c_lean_io_result_mk_error(lean_object *err) { + return lean_io_result_mk_error(err); +} + +lean_object *c_lean_mk_io_user_error(lean_object *msg) { + return lean_mk_io_user_error(msg); +} + +lean_object *c_lean_uint64_to_nat(uint64_t n) { + return lean_uint64_to_nat(n); +} + +// Create a big Nat from limbs (little-endian u64 array) +// This uses GMP's mpz_import and Lean's lean_alloc_mpz +lean_object *c_lean_nat_from_limbs(size_t num_limbs, uint64_t const *limbs) { + if (num_limbs == 0) { + return lean_box(0); + } + if (num_limbs == 1 && limbs[0] <= LEAN_MAX_SMALL_NAT) { + return lean_box(limbs[0]); + } + if (num_limbs == 1) { + return lean_uint64_to_nat(limbs[0]); + } + + // For multi-limb values, use GMP + mpz_t value; + mpz_init(value); + // Import limbs: little-endian order, native endian within limbs + // order = -1 (least significant limb first) + // size = 8 bytes per limb + // endian = 0 (native) + // nails = 0 (full limbs) + mpz_import(value, num_limbs, -1, sizeof(uint64_t), 0, 0, limbs); + + lean_object *result = lean_alloc_mpz(value); + // lean_alloc_mpz takes ownership, so we don't clear + return result; +} diff --git a/c/rust.h b/c/rust.h index 7b5324fb..03060475 100644 --- a/c/rust.h +++ b/c/rust.h @@ -63,3 +63,129 @@ void *rs_keccak256_hasher_init(void); void rs_keccak256_hasher_free(void*); void *rs_keccak256_hasher_update(void*, void*); void *rs_keccak256_hasher_finalize(void*, void*); + +/* --- Ixon FFI (incremental block comparison) --- */ + +// Test FFI round-trip +uint64_t rs_test_ffi_roundtrip(b_lean_obj_arg name); + +// Compile environment with Rust, returns opaque RustCompiledEnv* +void *rs_compile_env_rust_first(b_lean_obj_arg env_consts); + +// Free a RustCompiledEnv +void rs_free_rust_env(void *rust_env); + +// Get block count from RustCompiledEnv +uint64_t rs_get_rust_env_block_count(void const *rust_env); + +// Compare a single block, returns packed result +uint64_t rs_compare_block(void const *rust_env, b_lean_obj_arg name, b_lean_obj_arg lean_bytes); + +// Get the length of Rust's compiled bytes for a block +uint64_t rs_get_block_bytes_len(void const *rust_env, b_lean_obj_arg name); + +// Copy Rust's compiled bytes into a pre-allocated ByteArray +void rs_copy_block_bytes(void const *rust_env, b_lean_obj_arg name, lean_obj_arg dest); + +// Get Rust's sharing vector length for a block +uint64_t rs_get_block_sharing_len(void const *rust_env, b_lean_obj_arg name); + +// Compare block with typed result (returns BlockCompareDetail) +lean_obj_res rs_compare_block_v2(void const *rust_env, b_lean_obj_arg name, b_lean_obj_arg lean_bytes, uint64_t lean_sharing_len); + +// Get the buffer length needed for pre-sharing expressions +uint64_t rs_get_pre_sharing_exprs_len(void const *rust_env, b_lean_obj_arg name); + +// Get pre-sharing root expressions for a constant +uint64_t rs_get_pre_sharing_exprs(void const *rust_env, b_lean_obj_arg name, lean_obj_arg out_buf); + +// Look up a constant's compiled address (32-byte blake3 hash) +// Returns 1 on success, 0 if name not found +uint64_t rs_lookup_const_addr(void const *rust_env, b_lean_obj_arg name, lean_obj_arg out_addr); + +// Get the total number of compiled constants +uint64_t rs_get_compiled_const_count(void const *rust_env); + +/* --- Utility FFI --- */ + +// Read first 8 bytes of ByteArray as little-endian UInt64 (for Address.Hashable) +uint64_t rs_bytearray_to_u64_le(b_lean_obj_arg ba); + +/* --- Ix Canonicalization FFI --- */ + +// Canonicalize environment and return Ix.Environment +// Takes: List (Lean.Name × Lean.ConstantInfo) +// Returns: IO Ix.Environment +lean_obj_res rs_canonicalize_env_to_ix(b_lean_obj_arg env_consts); + +/* --- Round-trip FFI for testing Lean object construction --- */ + +// Round-trip basic types: Lean -> Rust -> Lean +lean_object *rs_roundtrip_nat(b_lean_obj_arg nat); +lean_object *rs_roundtrip_string(b_lean_obj_arg str); +lean_object *rs_roundtrip_list_nat(b_lean_obj_arg list); +lean_object *rs_roundtrip_array_nat(b_lean_obj_arg arr); +lean_object *rs_roundtrip_bytearray(b_lean_obj_arg ba); + +// Round-trip Ix types: Lean -> Rust -> Lean +lean_object *rs_roundtrip_ix_address(b_lean_obj_arg addr); +lean_object *rs_roundtrip_ix_name(b_lean_obj_arg name); +lean_object *rs_roundtrip_ix_level(b_lean_obj_arg level); +lean_object *rs_roundtrip_ix_expr(b_lean_obj_arg expr); +lean_object *rs_roundtrip_ix_int(b_lean_obj_arg int_val); +lean_object *rs_roundtrip_ix_substring(b_lean_obj_arg sub); +lean_object *rs_roundtrip_ix_source_info(b_lean_obj_arg si); +lean_object *rs_roundtrip_ix_syntax_preresolved(b_lean_obj_arg sp); +lean_object *rs_roundtrip_ix_syntax(b_lean_obj_arg syn); +lean_object *rs_roundtrip_ix_data_value(b_lean_obj_arg dv); +lean_object *rs_roundtrip_bool(b_lean_obj_arg b); +lean_object *rs_roundtrip_ix_constant_info(b_lean_obj_arg info); +lean_object *rs_roundtrip_ix_environment(b_lean_obj_arg env); +lean_object *rs_roundtrip_ix_raw_environment(b_lean_obj_arg raw_env); + +// Round-trip BlockCompareResult and BlockCompareDetail +lean_object *rs_roundtrip_block_compare_result(b_lean_obj_arg ptr); +lean_object *rs_roundtrip_block_compare_detail(b_lean_obj_arg ptr); + +/* --- RawCompiledEnv FFI --- */ + +// Compile environment and return RawCompiledEnv +// Takes: List (Lean.Name × Lean.ConstantInfo) +// Returns: IO RawCompiledEnv +lean_obj_res rs_compile_env_to_raw(b_lean_obj_arg env_consts); + +// Complete compilation pipeline - returns RustCompilationResult +// (rawEnv, condensed, compiled) +lean_obj_res rs_compile_env_full(b_lean_obj_arg env_consts); + +// Compile environment to Ixon RawEnv (structured Lean objects) +// Takes: List (Lean.Name × Lean.ConstantInfo) +// Returns: IO RawEnv +lean_obj_res rs_compile_env_to_ixon(b_lean_obj_arg env_consts); + +// Round-trip RawEnv for FFI testing +lean_object *rs_roundtrip_raw_env(b_lean_obj_arg raw_env); + +// Round-trip RustCondensedBlocks for FFI testing +lean_object *rs_roundtrip_rust_condensed_blocks(b_lean_obj_arg condensed); + +// Round-trip RustCompilePhases for FFI testing +lean_object *rs_roundtrip_rust_compile_phases(b_lean_obj_arg phases); + +// Combined compilation phases - returns RustCompilePhases +// (rawEnv, condensed, compileEnv) +// Takes: List (Lean.Name × Lean.ConstantInfo) +// Returns: IO RustCompilePhases +lean_obj_res rs_compile_phases(b_lean_obj_arg env_consts); + +/* --- Graph/SCC FFI --- */ + +// Build reference graph in Rust (returns Ix.Name-based graph) +// Takes: List (Lean.Name × Lean.ConstantInfo) +// Returns: IO (Array (Ix.Name × Array Ix.Name)) +lean_obj_res rs_build_ref_graph(b_lean_obj_arg env_consts); + +// Compute SCCs in Rust (returns Ix.Name-based CondensedBlocks) +// Takes: List (Lean.Name × Lean.ConstantInfo) +// Returns: IO RustCondensedBlocks +lean_obj_res rs_compute_sccs(b_lean_obj_arg env_consts); diff --git a/docs/Ixon.md b/docs/Ixon.md index 78522c8f..655f06d8 100644 --- a/docs/Ixon.md +++ b/docs/Ixon.md @@ -1,564 +1,1309 @@ # Ixon: Ix Object Notation -Ixon is a self-describing binary serialization format for the Ix platform. - -The format has three primary components: - -1. **Universes** correspond to type-system hierarchy levels in Ix's Lean - frontend, although structured slightly differently. -2. **Expressions** which are anonymized dependently-typed lambda calculus terms, - corresponding to expressions in the Lean Frontend. Ixon expressions are - alpha-invariant, meaning `fun (x : A, y: B) => x` and `fun (a : A, b : B) => a` map - to the same ``λ :`3 :`4 =>`0`` Ixon expression (where `A` and `B` in this example are referenced using local DeBruijn indexes) -3. **Constants** are top-level content-addressed global declarations such as - typed definitions or inductive datatypes - -## Ixon.Univ - -Ixon Universes are defined as follows - -```lean4 -inductive Univ where - -- tag: 0x0, syntax: 1, concrete type or sort level values - | const : UInt64 -> Univ - -- tag: 0x1, syntax: `1, level variables bound by a top-level constant - | var : UInt64 -> Univ - -- tag: 0x2, syntax: (add `1 `2), the sum of two universes - | add : UInt64 -> Univ -> Univ - -- tag: 0x3, syntax: (max x y), the maximum of two universes - | max : Univ -> Univ -> Univ - -- tag: 0x4, syntax: (imax x y), the impredicative maximum of two universes - | imax : Univ -> Univ -> Univ -``` - -This is structured slightly differently from Ix universes or Lean Levels: - -```lean4 -namespace Ix.IR - inductive Univ - | zero - | succ : Univ → Univ - | max : Univ → Univ → Univ - | imax : Univ → Univ → Univ - | var : Lean.Name → Nat → Univ -end Ix.IR -``` - -The Ixon converts the latter into a form more amenable for serialization by -collecting the unary zero/succ level representation into either simple -`Univ.const` values or possibly complex `Univ.add` values. +Ixon is a content-addressed, alpha-invariant binary serialization format for Lean kernel types. It is designed for the Ix platform's cryptographic verification and zero-knowledge proof systems. -### Serialization +## Design Goals + +1. **Alpha-invariance**: Structurally identical terms have identical serializations, regardless of variable names. The expression `fun (x : Nat) => x` and `fun (y : Nat) => y` serialize to the same bytes. + +2. **Content-addressing**: Every constant is identified by the blake3 hash of its serialized content. This enables deduplication and cryptographic verification. + +3. **Compact storage**: Variable-length encoding, telescope compression, and expression sharing minimize serialized size. + +4. **Metadata separation**: Names, binder info, and other source information are stored separately from the alpha-invariant core, enabling roundtrip compilation while preserving deterministic hashing. + +5. **ZK-compatibility**: Cryptographic commitments allow proving knowledge of constants without revealing their content. + +## Key Concepts + +### Alpha-Invariance + +Ixon achieves alpha-invariance through: +- **De Bruijn indices** for bound variables: `Var(0)` refers to the innermost binder +- **De Bruijn indices** for universe parameters: `Univ::Var(0)` is the first universe parameter +- **Content addresses** for constant references: constants are referenced by their hash, not their name + +### Content-Addressing + +Every `Constant` in Ixon is serialized and hashed with blake3. The resulting 256-bit hash is its `Address`. Two constants with identical structure have identical addresses, enabling: +- Automatic deduplication +- Cryptographic verification of equality +- Merkle-tree style proofs + +### Metadata Separation + +The Ixon format separates: +- **Alpha-invariant data** (`Constant`): The mathematical content, hashed for addressing +- **Metadata** (`ConstantMeta`, `ExprMeta`): Names, binder info, reducibility hints—stored separately -Universes are serialized in the following way: +This separation means cosmetic changes (renaming variables) don't change the constant's address. -First, each constructor is assigned a tag value between 0x00 and 0x04. This tag -value only requires 3 bits of space, so instead of using an entire byte, we -left-shift the Universe tag into the upper 3 bits of a tag-byte: +## Document Overview + +| Section | Contents | +|---------|----------| +| [Tag Encoding](#tag-encoding-schemes) | Variable-length integer encoding | +| [Universes](#universes) | Type-level hierarchy | +| [Expressions](#expressions) | Lambda calculus terms | +| [Constants](#constants) | Top-level declarations | +| [Sharing](#sharing-system) | Expression deduplication | +| [Metadata](#metadata) | Names and source info | +| [Environment](#environment) | Storage and serialization | +| [Proofs and Claims](#proofs-and-claims) | ZK claims and proofs | +| [Commitments](#cryptographic-commitments) | Commitment scheme | +| [Compilation](#compilation-lean--ixon) | Lean to Ixon conversion | +| [Decompilation](#decompilation-ixon--lean) | Ixon to Lean conversion | +| [Worked Examples](#comprehensive-worked-example) | End-to-end walkthroughs | + +--- + +## Tag Encoding Schemes + +Ixon uses three variable-length encoding schemes for compact representation. + +### Tag4 (4-bit flag) + +Used for expressions, constants, and environment/proof structures. Header byte format: ``` -0bTTTL_SSSS +[flag:4][large:1][size:3] ``` -where the `T` bits hold the tag value. +- **flag** (4 bits): Discriminates type (see table below) +- **large** (1 bit): If 0, size is in the low 3 bits. If 1, (size+1) bytes follow with the actual value +- **size** (3 bits): Small values 0-7, or byte count for large values + +**Complete Tag4 flag allocation:** + +| Flag | Category | Type | Size field meaning | +|------|----------|------|-------------------| +| 0x0 | Expr | Sort | Universe index | +| 0x1 | Expr | Var | De Bruijn index | +| 0x2 | Expr | Ref | Univ argument count | +| 0x3 | Expr | Rec | Univ argument count | +| 0x4 | Expr | Prj | Field index | +| 0x5 | Expr | Str | Refs table index | +| 0x6 | Expr | Nat | Refs table index | +| 0x7 | Expr | App | Application count (telescoped) | +| 0x8 | Expr | Lam | Binder count (telescoped) | +| 0x9 | Expr | All | Binder count (telescoped) | +| 0xA | Expr | Let | 0=dep, 1=non_dep | +| 0xB | Expr | Share | Share vector index | +| 0xC | Constant | Muts | Entry count | +| 0xD | Constant | Non-Muts | Variant (0-7) | +| 0xE | Env/Proof | Env/Claim/Proof/Comm | Variant (0-7) | +| 0xF | - | Reserved | - | + +```rust +pub struct Tag4 { + pub flag: u8, // 0-15 + pub size: u64, // Variable-length payload +} +``` -The `L` bit is called the `large-flag` and the `SSSS` bits are called the -"small-size" field, and can store various information depending on the Universe -variant defined by the tag value. +**Examples:** -For the `Univ.const` constructor, the large-flag and the small-size field are -used to hold in a single byte small values. For example, the following tag-byte +``` +Tag4 { flag: 0x1, size: 5 } +Header: 0b0001_0_101 = 0x15 +Total: 1 byte + +Tag4 { flag: 0x2, size: 256 } +Header: 0b0010_1_001 = 0x29 (large=1, 2 bytes follow) +Bytes: 0x00 0x01 (256 in little-endian) +Total: 3 bytes +``` + +### Tag2 (2-bit flag) + +Used for universes. Header byte format: ``` -0bTTTL_SSSS -0b0000_1111 +[flag:2][large:1][size:5] ``` -represents `Univ.const 15`. Larger values than 15 are represented with +- **flag** (2 bits): Discriminates universe type (0-3) +- **large** (1 bit): If 0, size is in the low 5 bits (0-31). If 1, (size+1) bytes follow +- **size** (5 bits): Small values 0-31, or byte count +```rust +pub struct Tag2 { + pub flag: u8, // 0-3 + pub size: u64, // Variable-length payload +} ``` -tag-byte , 1 large-size byte = small-size + 1 -0bTTTL_SSSS, LS0 -0b0001_0000, 0b1111_1111 -(Univ.const 255) -tag-byte , 2 large-size bytes = small-size + 1 -0bTTTL_SSSS, LS0 LS1 -0b0001_0001, 0b1111_1111, 0b1111_1111 -(Univ.const 65536) -... +**Examples:** + +``` +Tag2 { flag: 0, size: 15 } +Header: 0b00_0_01111 = 0x0F +Total: 1 byte + +Tag2 { flag: 3, size: 100 } +Header: 0b11_1_00000 = 0xE0 (large=1, 1 byte follows) +Bytes: 0x64 (100) +Total: 2 bytes ``` -If the large-flag is set, the small-size field is used to store the number of -bytes of an variable length large-size field (with an off-by-one optimization). - -This approach is used for `Univ.const` and `Univ.var`. For `Univ.max` and -`Univ.imax`, the large-flag and small size field are unused, and the -serialization of the parameters are directly concatenated. These sub-objects are -called the *body* fields. For example, the -serialization of `Univ.max (Univ.const 0) (Univ.const 15)` is: - -``` -tag-byte body1 body2 - (tag-byte) (tag-byte) -0bTTTL_SSSS, 0bTTTL_SSSS, 0bTTTL_SSSS -0b1000_0000, 0b0000_0000, 0b0000_1111 -``` - -The number of body fields is determined by the the tag value. - -Finally, Univ.add combines both a large-size field and a body field: - -``` -(Univ.add 15 (Univ.const 0)) -tag-byte body1 - (tag-byte) -0bTTTL_SSSS, 0bTTTL_SSSS -0b1000_1111, 0b0000_0000 - -(Univ.add 16 (Univ.const 0)) - -tag-byte large-size body1 -0bTTTL_SSSS, LS0, , 0bTTTL_SSSS -0b1001_0000, 0b0001_0000 0b0000_0000 -``` - -## Ixon.Expr - -Ixon expressions are defined as follows: - -```lean4 --- tag-byte: 0xTTTT_LXXX -inductive Expr where - -- tag: 0x0, syntax: ^1 - | vari (idx: UInt64) : Expr - -- tag: 0x1, syntax: {max (add 1 2) (var 1)} - | sort (univ: Univ) : Expr - -- tag: 0x2, syntax #dead_beef_cafe_babe.{u1, u2, ... } - | cnst (adr: Address) (lvls: List Univ) : Expr - -- tag: 0x3, syntax: #1.{u1, u2, u3} - | rec_ (idx: UInt64) (lvls: List Univ) : Expr - -- tag: 0x4, syntax: (f x y z) - | apps (func: Expr) (arg: Expr) (args: List Expr) : Expr - -- tag: 0x5, syntax: (λ A B C => body) - | lams (types: List Expr) (body: Expr) : Expr - -- tag: 0x6, syntax: (∀ A B C -> body) - | alls (types: List Expr) (body: Expr) : Expr - -- tag: 0x7, syntax: (let d : A in b) - | let_ (nonDep: Bool) (type: Expr) (defn: Expr) (body: Expr) : Expr - -- tag: 0x8, syntax: x.1 - | proj : UInt64 -> Expr -> Expr - -- tag: 0x9, syntax: "foobar" - | strl (lit: String) : Expr - -- tag: 0xA, syntax: 0xdead_beef - | natl (lit: Nat): Expr --- virtual expression: array: 0xB --- virtual expression: const: 0xC -``` - -This is largely similar to the Ix.Expr definition, which can be seen as a -content-addressable variation of Lean4 expressions once all metavariables have -been elaborated. - -```lean4 -namespace Ix.IR - inductive Expr - | var : Nat → List Univ → Expr - | sort : Univ → Expr - | const : Lean.Name → Address → Address → List Univ → Expr - | app : Expr → Expr → Expr - | lam : Expr → Expr → Expr - | pi : Expr → Expr → Expr - | letE : Bool -> Expr → Expr → Expr → Expr - | lit : Lean.Literal → Expr - | proj : Nat → Expr → Expr -end Ix.IR -``` - -The primary differences between these types are: - -1. Non-computationally relevante metadata like Lean.Name, or BinderInfo are - removed (TODO: update Ix.IR def once metadata is implemented) -2. Repeated lambda and forall binders are collected, so that e.g. `fun x y z => a` -can be represented with a single `Expr.lam`. -3. Repeated application of arguments are collected into telescopes, so that e.g. -`(f a b c)` can be expressed with a single `Expr.app` -4. String and number literals are lifted into the Expr inductive - -Expr has two reserved "virtual" constructors, which are used in order -to create the Ixon.constants, and will be explained in the next section. +### Tag0 (no flag) -### Serialization +Used for plain variable-length u64 values. Header byte format: -Expression serialization is structurally similar to that for Universes. -Expression tags range from 0x0 to 0xF (with 0xB, 0xC used for Const, and 0xD -through 0xF reserved for future use), so they require 4 bits, rather than 3 for -universes. Otherwise, expressions have the same tag-byte structure as universes, -with a large-flag and a small-size field: +``` +[large:1][size:7] +``` + +- **large** (1 bit): If 0, size is in the low 7 bits (0-127). If 1, (size+1) bytes follow +- **size** (7 bits): Small values 0-127, or byte count + +**Examples:** ``` -0xTTTT_LSSS +Tag0 { size: 42 } +Header: 0b0_0101010 = 0x2A +Total: 1 byte + +Tag0 { size: 1000 } +Header: 0b1_0000001 = 0x81 (large=1, 2 bytes follow) +Bytes: 0xE8 0x03 (1000 in little-endian) +Total: 3 bytes ``` -We will now work through serializations for each Expr constructor in detail: +--- + +## Universes -#### Expr.var +Universes represent type-level hierarchy in the dependent type system. +```rust +pub enum Univ { + Zero, // Type 0 / Prop + Succ(Arc), // Successor: Type (n+1) + Max(Arc, Arc), // Maximum of two universes + IMax(Arc, Arc), // Impredicative max (0 if second is 0) + Var(u64), // Universe parameter (de Bruijn index) +} ``` --- tag: 0x0, syntax: ^1 -| vari (idx: UInt64) : Expr + +### Serialization (Tag2) + +| Flag | Variant | Size field | Body | +|------|---------|------------|------| +| 0 | Zero/Succ | Succ count (0 = Zero) | None | +| 1 | Max | Unused | Two Univs | +| 2 | IMax | Unused | Two Univs | +| 3 | Var | Variable index | None | + +**Telescope compression**: Nested `Succ` constructors are collapsed. `Succ(Succ(Succ(Zero)))` serializes as a single Tag2 with flag=0 and size=3. + +### Examples + ``` +Univ::Zero +Tag2 { flag: 0, size: 0 } = 0b00_0_00000 +Bytes: 0x00 + +Univ::Succ(Zero) // Type 1 +Tag2 { flag: 0, size: 1 } + base +Bytes: 0x01 0x00 -Variables are serialized similarly to Univ.var universe variables. The small or -large size field holds the index: +Univ::Succ(Succ(Succ(Zero))) // Type 3 +Tag2 { flag: 0, size: 3 } + base +Bytes: 0x03 0x00 +Univ::Var(0) // First universe parameter +Tag2 { flag: 3, size: 0 } = 0b11_0_00000 +Bytes: 0xC0 + +Univ::Var(1) // Second universe parameter +Tag2 { flag: 3, size: 1 } = 0b11_0_00001 +Bytes: 0xC1 + +Univ::Max(Zero, Var(1)) +Tag2 { flag: 1, size: 0 } + Zero + Var(1) +Bytes: 0x40 0x00 0xC1 +``` + +--- + +## Expressions + +Expressions are alpha-invariant lambda calculus terms with de Bruijn indices. + +```rust +pub enum Expr { + Sort(u64), // Type at universe level (index into univs table) + Var(u64), // De Bruijn variable index + Ref(u64, Vec), // Constant reference (refs index, univ indices) + Rec(u64, Vec), // Mutual recursion (ctx index, univ indices) + Prj(u64, u64, Arc), // Projection (type refs index, field, value) + Str(u64), // String literal (refs index to blob) + Nat(u64), // Natural literal (refs index to blob) + App(Arc, Arc), // Application + Lam(Arc, Arc), // Lambda (type, body) + All(Arc, Arc), // Forall/Pi (type, body) + Let(bool, Arc, Arc, Arc), // Let (non_dep, type, value, body) + Share(u64), // Reference to sharing vector +} ``` -0xTTTT_LSSS -(.var 0) -0x0000_0000 -(.var 7) -0x0000_0111 +### Key Design Choices + +1. **No names**: Binders have no names—they use de Bruijn indices. Names are stored in metadata. + +2. **No binder info**: Implicit/explicit info is stored in metadata. + +3. **Indirection tables**: `Ref`, `Str`, `Nat` store indices into the constant's `refs` table, not raw addresses. `Sort` stores an index into the `univs` table. -(.var 8) -0x0000_1000, 0x0000_1000 +4. **Share nodes**: Common subexpressions can be deduplicated via `Share(idx)` references to the constant's `sharing` vector. -(.var 256) -0x0000_1001, 0x0000_0000, 0x0000_0001 +### Serialization (Tag4) + +| Flag | Variant | Size field | Body | +|------|---------|------------|------| +| 0x0 | Sort | Universe index | None | +| 0x1 | Var | De Bruijn index | None | +| 0x2 | Ref | Univ count | Ref index (Tag0) + univ indices | +| 0x3 | Rec | Univ count | Rec index (Tag0) + univ indices | +| 0x4 | Prj | Field index | Type ref index (Tag0) + value Expr | +| 0x5 | Str | Refs index | None | +| 0x6 | Nat | Refs index | None | +| 0x7 | App | App count | Function + args (telescoped) | +| 0x8 | Lam | Binder count | Types + body (telescoped) | +| 0x9 | All | Binder count | Types + body (telescoped) | +| 0xA | Let | 0=dep, 1=non_dep | Type + value + body | +| 0xB | Share | Share index | None | + +### Telescope Compression + +Nested constructors of the same kind are collapsed: + +**Applications**: `App(App(App(f, a), b), c)` becomes: +``` +Tag4 { flag: 0x7, size: 3 } // 3 applications ++ f + a + b + c ``` -The index, when large, is stored in little-endian format. -#### Expr.sort +**Lambdas**: `Lam(t1, Lam(t2, Lam(t3, body)))` becomes: +``` +Tag4 { flag: 0x8, size: 3 } // 3 binders ++ t1 + t2 + t3 + body +``` + +**Foralls**: Same as lambdas with flag 0x9. + +### Expression Examples ``` --- tag: 0x1, syntax: {max (add 1 2) (var 1)} -| sort (univ: Univ) : Expr +Expr::Var(0) // Innermost bound variable +Tag4 { flag: 0x1, size: 0 } +Bytes: 0x10 + +Expr::Sort(0) // First universe in univs table +Tag4 { flag: 0x0, size: 0 } +Bytes: 0x00 + +Expr::Ref(0, vec![0, 1]) // First constant with 2 univ args +Tag4 { flag: 0x2, size: 2 } ++ Tag0(0) // refs index ++ Tag0(0) // first univ index ++ Tag0(1) // second univ index +Bytes: 0x22 0x00 0x00 0x01 + +Expr::Lam(type_expr, Lam(type_expr2, body)) // 2-binder lambda +Tag4 { flag: 0x8, size: 2 } ++ type_expr + type_expr2 + body + +Expr::Share(5) // Reference to sharing[5] +Tag4 { flag: 0xB, size: 5 } +Bytes: 0xB5 ``` -Sorts are serialized identically to the universe serialization described above, -with a single byte prefix. The size fields are not used. +--- + +## Constants + +A `Constant` is the top-level unit of storage, containing an alpha-invariant declaration plus reference tables. + +```rust +pub struct Constant { + pub info: ConstantInfo, // The declaration payload + pub sharing: Vec>, // Shared subexpressions + pub refs: Vec
, // Referenced constant addresses + pub univs: Vec>, // Referenced universes +} +``` + +### Reference Tables + +Expressions don't store addresses or universes directly. Instead: + +- `Expr::Ref(idx, univ_indices)` → `constant.refs[idx]` is the address, `constant.univs[univ_indices[i]]` are the universe arguments +- `Expr::Sort(idx)` → `constant.univs[idx]` is the universe +- `Expr::Str(idx)` / `Expr::Nat(idx)` → `constant.refs[idx]` is an address into the blob store + +This indirection enables sharing and smaller serializations. + +### Serialization + +Constants use two Tag4 flags: +- **Flag 0xD**: Non-Muts constants. Size field (0-7) holds the variant. Always 1-byte tag. +- **Flag 0xC**: Muts constants. Size field holds the entry count. +**Non-Muts format:** ``` -(Expr.sort (Univ.var 0)) -0xTTTT_LSSS 0bTTTL_SSSS -0x0001_0000 0b0000_0000 +Tag4 { flag: 0xD, size: variant } // Always 1 byte (variant 0-7) ++ ConstantInfo payload ++ sharing vector (Tag0 length + expressions) ++ refs vector (Tag0 length + 32-byte addresses) ++ univs vector (Tag0 length + universes) ``` -#### Expr.cnst +**Muts format:** +``` +Tag4 { flag: 0xC, size: entry_count } ++ MutConst entries (no length prefix - count is in tag) ++ sharing vector ++ refs vector ++ univs vector +``` +### ConstantInfo Variants + +```rust +pub enum ConstantInfo { + Defn(Definition), // variant 0 + Recr(Recursor), // variant 1 + Axio(Axiom), // variant 2 + Quot(Quotient), // variant 3 + CPrj(ConstructorProj), // variant 4 + RPrj(RecursorProj), // variant 5 + IPrj(InductiveProj), // variant 6 + DPrj(DefinitionProj), // variant 7 + Muts(Vec), // uses FLAG_MUTS (0xC), not a variant +} ``` --- tag: 0x2, syntax #dead_beef_cafe_babe.{u1, u2, ... } -| cnst (adr: Address) (lvls: List Univ) : Expr + +| Variant | Type | Notes | +|---------|------|-------| +| 0 | Defn | Definition/Opaque/Theorem | +| 1 | Recr | Recursor | +| 2 | Axio | Axiom | +| 3 | Quot | Quotient | +| 4 | CPrj | Constructor projection | +| 5 | RPrj | Recursor projection | +| 6 | IPrj | Inductive projection | +| 7 | DPrj | Definition projection | +| - | Muts | Uses flag 0xC | + +#### Definition (variant 0) + +Covers definitions, theorems, and opaques. + +```rust +pub struct Definition { + pub kind: DefKind, // Definition | Opaque | Theorem + pub safety: DefinitionSafety, // Safe | Unsafe | Partial + pub lvls: u64, // Universe parameter count + pub typ: Arc, // Type expression + pub value: Arc, // Value expression +} ``` -The const reference serialization uses the size fields to store the number of -universe arguments, which follow the fixed 256-bit/32-byte Address serialization -as body fields: +**Serialization**: +``` +DefKind+Safety packed (1 byte): (kind << 2) | safety + - kind: 0=Definition, 1=Opaque, 2=Theorem + - safety: 0=Unsafe, 1=Safe, 2=Partial ++ lvls (Tag0) ++ typ (Expr) ++ value (Expr) +``` +#### Recursor (variant 1) + +Eliminator for inductive types. + +```rust +pub struct Recursor { + pub k: bool, // K-like (eliminates into Prop) + pub is_unsafe: bool, + pub lvls: u64, // Universe parameter count + pub params: u64, // Number of parameters + pub indices: u64, // Number of indices + pub motives: u64, // Number of motives + pub minors: u64, // Number of minor premises + pub typ: Arc, // Type expression + pub rules: Vec, +} + +pub struct RecursorRule { + pub fields: u64, // Field count for this constructor + pub rhs: Arc, // Right-hand side +} ``` -(Expr.cnst [Univ.var 0,Univ.var 1, Univ.var 2]) -0xTTTT_LSSS 32 Address bytes body1 body2 body3 -0x0002_0011, ..., , 0b0000_0000, 0b0000_0001, 0b0000_0002 + +**Serialization**: ``` +Packed bools (1 byte): bit 0 = k, bit 1 = is_unsafe ++ lvls (Tag0) ++ params (Tag0) ++ indices (Tag0) ++ motives (Tag0) ++ minors (Tag0) ++ typ (Expr) ++ rules.len (Tag0) ++ [RecursorRule]* +``` + +Each `RecursorRule` serializes as: +``` +fields (Tag0) ++ rhs (Expr) +``` + +#### Axiom (variant 2) -#### Expr.rec_ +```rust +pub struct Axiom { + pub is_unsafe: bool, + pub lvls: u64, + pub typ: Arc, +} +``` +**Serialization**: ``` --- tag: 0x3, syntax: #1.{u1, u2, u3} -| rec_ (idx: UInt64) (lvls: List Univ) : Expr +is_unsafe (1 byte: 0 or 1) ++ lvls (Tag0) ++ typ (Expr) ``` -Recursive references serialize like a combination of Expr.var and Expr.cnst. The -size fields store the index: +#### Quotient (variant 3) + +Quotient type primitives (there are exactly 4 in Lean: `Quot`, `Quot.mk`, `Quot.lift`, `Quot.ind`). + +```rust +pub struct Quotient { + pub kind: QuotKind, // Type | Ctor | Lift | Ind + pub lvls: u64, + pub typ: Arc, +} ``` -(.rec 0 [.var 0, .var 1]) -0xTTTT_LSSS, body1, body2 -0x0011_0000, 0b0000_0000, 0b0000_0001 -(.rec 8 [.var 0, .var 1]) -0xTTTT_LSSS, L0, body1, body2 -0x0011_1000, 0b0000_1000, 0b0000_0000, 0b0000_0001 +**Serialization**: ``` +QuotKind (1 byte: 0=Type, 1=Ctor, 2=Lift, 3=Ind) ++ lvls (Tag0) ++ typ (Expr) +``` + +#### Projections (variants 4-7) -#### Expr.apps +Projections reference a mutual block and an index within it: -Applications serialize by storing the number of extra arguments in the size -field. There is a body field for the function and first argument, so total -number of body fields is the number of extra arguments plus 2. +```rust +pub struct InductiveProj { pub idx: u64, pub block: Address } +pub struct ConstructorProj { pub idx: u64, pub cidx: u64, pub block: Address } +pub struct RecursorProj { pub idx: u64, pub block: Address } +pub struct DefinitionProj { pub idx: u64, pub block: Address } ``` -(f x y z) -(.app (.vari 0) (.vari 1) [.vari 2, .vari 3]) -0xTTTT_LSSS, body1, body2, body3, body4 -0x0100_0010, 0b0000_0000, 0b0000_0001, 0b0000_0010, 0b0000_0011 + +When a constant is part of a mutual block, it's stored as a projection pointing to the shared `Muts` block. This avoids duplication. + +#### Mutual Block (flag 0xC) + +Muts uses its own flag (0xC) instead of a variant under flag 0xD. The size field contains the entry count, eliminating the need for a separate length prefix. + +Contains multiple related constants: + +```rust +pub enum MutConst { + Defn(Definition), // tag 0 + Indc(Inductive), // tag 1 + Recr(Recursor), // tag 2 +} ``` -#### Expr.lams +Each `MutConst` entry serializes as a 1-byte tag followed by the payload. The `sharing`, `refs`, and `univs` tables are shared across all members of the mutual block. + +#### Inductive (inside MutConst) + +An inductive type definition with its constructors. + +```rust +pub struct Inductive { + pub recr: bool, // Has recursive occurrences + pub refl: bool, // Is reflexive + pub is_unsafe: bool, + pub lvls: u64, // Universe parameter count + pub params: u64, // Number of parameters + pub indices: u64, // Number of indices + pub nested: u64, // Nested inductive depth + pub typ: Arc, // Type expression + pub ctors: Vec, +} +``` + +**Serialization**: +``` +Packed bools (1 byte): bit 0 = recr, bit 1 = refl, bit 2 = is_unsafe ++ lvls (Tag0) ++ params (Tag0) ++ indices (Tag0) ++ nested (Tag0) ++ typ (Expr) ++ ctors.len (Tag0) ++ [Constructor]* +``` + +#### Constructor (inside Inductive) + +A constructor within an inductive type. + +```rust +pub struct Constructor { + pub is_unsafe: bool, + pub lvls: u64, // Universe parameter count + pub cidx: u64, // Constructor index + pub params: u64, // Number of parameters + pub fields: u64, // Number of fields + pub typ: Arc, // Type expression +} +``` + +**Serialization**: +``` +is_unsafe (1 byte: 0 or 1) ++ lvls (Tag0) ++ cidx (Tag0) ++ params (Tag0) ++ fields (Tag0) ++ typ (Expr) +``` + +--- + +## Sharing System -Lambdas store the number of binders in the size fields, and then the binder -types in a corresponding number of body fields, plus an additional body field -for the function body. +The sharing system deduplicates common subexpressions within a constant. + +### How It Works + +1. **Merkle hashing**: Every subexpression is assigned a structural hash using blake3 +2. **Usage counting**: Count how many times each unique subexpression appears +3. **Profitability analysis**: Decide which subexpressions to share based on size savings +4. **Rewriting**: Replace selected subexpressions with `Share(idx)` references + +### Profitability Heuristic + +Sharing a subterm is profitable when: ``` -(λ :A :B :C => b) -(.lams [.vari 0, .vari 1, .vari 2] .vari 3]) -0xTTTT_LSSS, body1, body2, body3, body4 -0x0101_0011, 0b0000_0000, 0b0000_0001, 0b0000_0010, 0b0000_0011 +(N - 1) * term_size > N * share_ref_size ``` -#### Expr.alls +Where: +- `N` = number of occurrences +- `term_size` = serialized size of the subterm +- `share_ref_size` = size of `Share(idx)` tag (typically 1-2 bytes) + +### Sharing Vector + +The sharing vector is built incrementally: +- Each entry can only reference earlier entries (no forward references) +- Entries are sorted by profitability (most savings first) +- Root expressions are rewritten using all available share indices + +### Example -Foralls are identical to lambdas with a different tag: +Before sharing: ``` -(∀ :A :B :C => b) -(.alls [.vari 0, .vari 1, .vari 2] .vari 3]) -0xTTTT_LSSS, body1, body2, body3, body4 -0x0110_0011, 0b0000_0000, 0b0000_0001, 0b0000_0010, 0b0000_0011 +App( + Lam(Nat, Lam(Nat, App(add, Var(1), Var(0)))), + App( + Lam(Nat, Lam(Nat, App(add, Var(1), Var(0)))), // Duplicate! + zero + ) +) ``` -#### Expr.let_ +After sharing: +``` +sharing[0] = Lam(Nat, Lam(Nat, App(add, Var(1), Var(0)))) + +App( + Share(0), + App(Share(0), zero) +) +``` + +--- + +## Metadata + +Metadata stores non-structural information that's needed for roundtrip compilation but doesn't affect the constant's identity. -Let bindings do not use the size fields and have 3 body fields: +### ExprMeta Arena +Expression metadata is stored as an append-only arena of `ExprMetaData` nodes, built bottom-up during compilation. Each node has an arena index, and parent nodes reference children by index. + +```rust +/// Arena for expression metadata within a single constant. +pub struct ExprMeta { + pub nodes: Vec, +} + +pub enum ExprMetaData { + Leaf, // Var, Sort, Nat, Str (no metadata) + App { children: [u64; 2] }, // [fun_idx, arg_idx] + Binder { name: Address, info: BinderInfo, children: [u64; 2] }, // [type_idx, body_idx] + LetBinder { name: Address, children: [u64; 3] }, // [type_idx, value_idx, body_idx] + Ref { name: Address }, // Const/Rec reference name + Prj { struct_name: Address, child: u64 }, // Projection struct name + Mdata { mdata: Vec, child: u64 }, // Metadata wrapper +} ``` -(.let_ .vari 0, .vari 1, .vari 2) -0xTTTT_LSSS, body1, body2, body3 -0x0111_0000, 0b0000_0000, 0b0000_0001, 0b0000_0011 + +**ExprMetaData Serialization** (tags 0-9, with BinderInfo packed into Binder tags): + +| Tag | Variant | Payload | +|-----|---------|---------| +| 0 | Leaf | (none) | +| 1 | App | children: [u64, u64] | +| 2 | Binder (Default) | name_idx + children: [u64, u64] | +| 3 | Binder (Implicit) | name_idx + children: [u64, u64] | +| 4 | Binder (StrictImplicit) | name_idx + children: [u64, u64] | +| 5 | Binder (InstImplicit) | name_idx + children: [u64, u64] | +| 6 | LetBinder | name_idx + children: [u64, u64, u64] | +| 7 | Ref | name_idx | +| 8 | Prj | struct_name_idx + child: u64 | +| 9 | Mdata | kvmap_count + kvmaps + child: u64 | + +Packing BinderInfo into the Binder tag (tags 2-5) saves 1 byte per binder. Name addresses are serialized as indices into a `NameIndex` for compactness. + +### ConstantMeta + +Per-constant metadata. Each variant stores a name, universe parameter names, an `ExprMeta` arena, and root indices pointing into the arena: + +```rust +pub enum ConstantMeta { + Empty, // tag 255 + Def { name, lvls, hints, all, ctx, + arena, type_root, value_root }, // tag 0 + Axio { name, lvls, arena, type_root }, // tag 1 + Quot { name, lvls, arena, type_root }, // tag 2 + Indc { name, lvls, ctors, all, ctx, + arena, type_root }, // tag 3 + Ctor { name, lvls, induct, arena, type_root }, // tag 4 + Rec { name, lvls, rules, all, ctx, + arena, type_root, rule_roots }, // tag 5 +} ``` -#### Expr.proj +**ConstantMeta Serialization:** + +| Tag | Variant | Payload | +|-----|---------|---------| +| 0 | Def | name_idx, lvl_idxs, hints, all_idxs, ctx_idxs, arena, type_root, value_root | +| 1 | Axio | name_idx, lvl_idxs, arena, type_root | +| 2 | Quot | name_idx, lvl_idxs, arena, type_root | +| 3 | Indc | name_idx, lvl_idxs, ctor_idxs, all_idxs, ctx_idxs, arena, type_root | +| 4 | Ctor | name_idx, lvl_idxs, induct_idx, arena, type_root | +| 5 | Rec | name_idx, lvl_idxs, rule_idxs, all_idxs, ctx_idxs, arena, type_root, rule_roots | +| 255 | Empty | (none) | -Projections store their index in the size fields and have 1 body field: +### Indexed Serialization +Metadata uses indexed serialization for efficiency. A `NameIndex` maps addresses to sequential indices, reducing 32-byte addresses to 1-2 byte indices: + +```rust +pub type NameIndex = HashMap; +pub type NameReverseIndex = Vec
; ``` -(.proj 0 .vari 0) -0xTTTT_LSSS, body1 -0x1000_0000, 0x0000_0000 + +--- + +## Environment + +The `Env` structure stores all Ixon data using concurrent `DashMap`s. + +```rust +pub struct Env { + pub consts: DashMap, // Alpha-invariant constants + pub named: DashMap, // Name -> (address, metadata) + pub blobs: DashMap>, // Raw data (strings, nats) + pub names: DashMap, // Hash-consed Name components + pub comms: DashMap, // Cryptographic commitments + pub addr_to_name: DashMap, // Reverse index +} + +pub struct Named { + pub addr: Address, // Address of constant in consts + pub meta: ConstantMeta, // Metadata for this constant +} +``` + +### Storage Layers + +| Map | Key | Value | Purpose | +|-----|-----|-------|---------| +| `consts` | Content hash | Constant | Alpha-invariant data | +| `named` | Lean Name | Named | Name → address + metadata | +| `blobs` | Content hash | Bytes | String/nat literals | +| `names` | Name hash | Name | Hash-consed name components | +| `comms` | Commitment | Comm | ZK commitments | + +### Blob Storage + +Blobs store raw byte data for string and natural number literals. When an expression contains `Expr::Str(idx)` or `Expr::Nat(idx)`, the `refs[idx]` address points to a blob entry. + +**String encoding**: UTF-8 bytes directly. + +**Natural number encoding**: Little-endian bytes (minimum representation). + +```rust +// String "hello" -> 5 bytes: [0x68, 0x65, 0x6C, 0x6C, 0x6F] +// Nat 256 -> 2 bytes: [0x00, 0x01] +// Nat 0 -> 1 byte: [0x00] ``` -#### Expr.strl +Blobs are content-addressed: the blob's address is `blake3(bytes)`. + +### Name Hash-Consing -String literals store the length of the utf8 text in bytes in the size fields: +Lean names are hierarchical (e.g., `Nat.add` = `Str(Str(Anonymous, "Nat"), "add")`). Ixon hash-conses names so identical name components share storage. +```rust +pub enum NameData { + Anonymous, // Root/empty name + Str(Name, String), // Parent + string component + Num(Name, Nat), // Parent + numeric component (for hygiene) +} ``` -(.strl "foobar") -0xTTTT_LSSS, body -0x1001_0100, 0x66, 0x6f, 0x6f, 0x62, 0x61, 0x72 + +**Name serialization** (component form, for Env section 3): +``` +Tag (1 byte): 0 = Anonymous, 1 = Str, 2 = Num ++ (if Str/Num) parent_address (32 bytes) ++ (if Str) string_len (Tag0) + UTF-8 bytes ++ (if Num) nat_len (Tag0) + little-endian bytes ``` -#### Expr.natl +Names are topologically sorted in the environment so parents are serialized before children, enabling reconstruction during deserialization. -Number literals store the length of the natural number's byte representation according to -the following algorithm: +### Environment Serialization -```lean4 -def natToBytesLE (x: Nat) : Array UInt8 := - if x == 0 then Array.mkArray1 0 else List.toArray (go x x) - where - go : Nat -> Nat -> List UInt8 - | _, 0 => [] - | 0, _ => [] - | Nat.succ f, x => Nat.toUInt8 x:: go f (x / 256) +The environment serializes in 5 sections with a version header: -def natFromBytesLE (xs: Array UInt8) : Nat := - xs.toList.enum.foldl (fun acc (i, b) => acc + (UInt8.toNat b) * 256 ^ i) 0 ``` +Header: Tag4 { flag: 0xE, size: VERSION } +``` + +Current version is 2 (supports zstd compression after header). +**Section 1: Blobs** (Address → raw bytes) ``` -(.natl 0) -0xTTTT_LSSS, body -0x1010_0001, 0x0 +count (Tag0) +[Address (32 bytes) + len (Tag0) + bytes]* ``` -## Ixon.Const +**Section 2: Constants** (Address → Constant) +``` +count (Tag0) +[Address (32 bytes) + Constant]* +``` -Ixon constants are defined as follows: +**Section 3: Names** (Address → NameComponent, topologically sorted) +``` +count (Tag0) +[Address (32 bytes) + NameComponent]* +``` -```lean4 -inductive Const where - -- 0xC0 - | axio : Axiom -> Const - -- 0xC1 - | theo : Theorem -> Const - -- 0xC2 - | opaq : Opaque -> Const - -- 0xC3 - | defn : Definition -> Const - -- 0xC4 - | quot : Quotient -> Const - -- 0xC5 - | ctor : Constructor -> Const - -- 0xC6 - | recr : Recursor -> Const - -- 0xC7 - | indc : Inductive -> Const - -- 0xC8 - | ctorProj : ConstructorProj -> Const - -- 0xC9 - | recrProj : RecursorProj -> Const - -- 0xCA - | indcProj : InductiveProj -> Const - -- 0xCB - | defnProj : DefinitionProj -> Const - -- 0xCC - | mutDef : List Definition -> Const - -- 0xCD - | mutInd : List Inductive -> Const - -- 0xCE - | meta : Metadata -> Const - deriving BEq, Repr, Inhabited +**Section 4: Named** (Name Address → Named with indexed metadata) +``` +count (Tag0) +[NameAddress (32 bytes) + ConstAddress (32 bytes) + ConstantMeta]* ``` -The internal details of this inductive are quite detailed, but -corresponds to top-level declarations in the Lean4 frontend, rendered namelessly -content-addressable. +**Section 5: Commitments** (Address → Comm) +``` +count (Tag0) +[Address (32 bytes) + secret_addr (32 bytes) + payload_addr (32 bytes)]* +``` -### Serialization +--- + +## Proofs and Claims + +Claims, proofs, commitments, and environments share Tag4 flag 0xE. + +### Tag4 0xE Variant Layout + +| Size | Byte | Type | Payload | +|------|------|------|---------| +| 0 | `0xE0` | Environment | sections | +| 1 | `0xE1` | CheckProof | 1 addr + proof bytes | +| 2 | `0xE2` | EvalProof | 2 addr + proof bytes | +| 3 | `0xE3` | CheckClaim | 1 addr | +| 4 | `0xE4` | EvalClaim | 2 addr: input, output | +| 5 | `0xE5` | Commitment | 2 addr: secret, payload | +| 6 | `0xE6` | RevealClaim | 1 addr + RevealConstantInfo | +| 7 | `0xE7` | RevealProof | 1 addr + RevealConstantInfo + proof bytes | + +### Claim Types + +```rust +/// Evaluation claim: the constant at `input` evaluates to the constant at `output`. +pub struct EvalClaim { + pub input: Address, // Input constant address + pub output: Address, // Output constant address +} + +/// Type-checking claim: the constant at `value` is well-typed. +pub struct CheckClaim { + pub value: Address, // Value constant address +} + +/// Selective revelation of fields of a committed constant. +pub struct RevealClaim { + pub comm: Address, // Commitment address + pub info: RevealConstantInfo, // Revealed field information +} + +pub enum Claim { + Evals(EvalClaim), + Checks(CheckClaim), + Reveals(RevealClaim), +} +``` -We will first describe the "virtual expression" constructors from the previous -section, then go through each Const variant and describe its serialization: +### Commitment Hashing -#### Arrays +Commitments are serialized with Tag4(0xE, 5) and hashed with blake3: +``` +commitment_address = blake3(0xE5 + secret_address + payload_address) +``` -The expression tag 0xB signifies an array of homogoneous body fields, and stores -the number of such fields in the expression size fields. The format of these -body fields must be known from context +The payload address is always the transparent hash of the constant, regardless of the secret. +Two commitments to the same constant share the same payload address. -#### Consts +### RevealConstantInfo Format -The expression tag 0xC signnifies a constant. The large flag and small size -field are combined to store a second 4-bit tag indicating the const variant. -This is done to enable Ixon.Const and Ixon.Expr to live in the same "namespace" -of bytes, and remove possible ambiguities between them. +RevealClaim allows selective revelation of constant metadata fields (kind, safety, idx, etc.) +without opening the full commitment. Serialization: `variant (1 byte) + field_mask (Tag0) + field values...` -#### Const.axio +The field_mask uses Tag0 encoding (1 byte for masks < 128). Fields are serialized in mask bit order. +Expression fields are revealed as `Address = blake3(serialized Expr bytes)`. -```lean4 --- tag: 0xC0 -| axio : Axiom -> Const +### Proof Structure -structure Axiom where - lvls : Nat - type : Expr +```rust +pub struct Proof { + pub claim: Claim, // The claim being proven + pub proof: Vec, // Opaque proof data (e.g., ZK proof bytes) +} ``` -Axioms serialize as a tag-byte and two Expr body fields: +### Serialization Examples +**EvalClaim** (0xE4, 2 addresses): ``` -tag-byte, body1, body2 -0xC0, , +E4 -- Tag4 { flag: 0xE, size: 4 } (EvalClaim) +[32 bytes] -- input address +[32 bytes] -- output address ``` -#### Const.theo +**EvalProof** (0xE2, 2 addresses + proof): +``` +E2 -- Tag4 { flag: 0xE, size: 2 } (EvalProof) +[32 bytes] -- input address +[32 bytes] -- output address +04 -- proof.len = 4 (Tag0) +01 02 03 04 -- proof bytes +``` -```lean4 --- tag: 0xC1 -| theo : Theorem -> Const +**CheckClaim** (0xE3, 1 address): +``` +E3 -- Tag4 { flag: 0xE, size: 3 } (CheckClaim) +[32 bytes] -- value address +``` -structure Theorem where - lvls : Nat - type : Expr - value : Expr +**RevealClaim** — reveal that a committed Definition has `safety = Safe`: +``` +E6 -- Tag4 { flag: 0xE, size: 6 } (RevealClaim) +[32 bytes] -- comm_addr +00 -- variant: Definition +02 -- mask: bit 1 (safety) [Tag0] +01 -- DefinitionSafety::Safe ``` +Total: 36 bytes. -Theorems serialize as a tag-byte and three Expr body fields: +--- +## Compilation (Lean → Ixon) + +Compilation transforms Lean constants into Ixon format. + +### CompileState + +```rust +pub struct CompileState { + pub env: IxonEnv, // Ixon environment being built + pub name_to_addr: DashMap, // Name → Ixon address + pub blocks: DashSet
, // Mutual block addresses +} ``` -tag-byte, body1, body2, body3 -0xC1, , , + +### Expression Compilation + +The `compile_expr` function transforms Lean expressions: + +| Lean | Ixon | Notes | +|------|------|-------| +| `Bvar(n)` | `Var(n)` | De Bruijn index preserved | +| `Sort(level)` | `Sort(idx)` | Level added to univs table | +| `Const(name, levels)` | `Ref(idx, univ_idxs)` | Name resolved to address | +| `Const(name, levels)` in mutual | `Rec(ctx_idx, univ_idxs)` | Uses mutual context | +| `Lam(name, ty, body, info)` | `Lam(ty, body)` | Name/info to metadata | +| `ForallE(name, ty, body, info)` | `All(ty, body)` | Name/info to metadata | +| `LetE(name, ty, val, body, nd)` | `Let(nd, ty, val, body)` | Name to metadata | +| `Proj(type, idx, val)` | `Prj(type_idx, idx, val)` | Type name resolved | +| `Lit(Nat n)` | `Nat(idx)` | Bytes stored in blobs | +| `Lit(Str s)` | `Str(idx)` | Bytes stored in blobs | + +### Metadata Extraction + +During compilation, metadata is extracted into `ExprMetas`: + +1. **Pre-order index**: Each expression node gets an index during traversal +2. **Binder info**: Lambda/forall binder names and info stored at their index +3. **Const names**: For `Rec` references, the original name is stored +4. **Mdata**: Key-value metadata wrappers are collected + +### Mutual Block Handling + +1. **Build MutCtx**: Map from constant name to index within the block +2. **Compile each constant** with the mutual context +3. **Create Muts block** with shared tables +4. **Create projections** for each named constant + +--- + +## Decompilation (Ixon → Lean) + +Decompilation reconstructs Lean constants from Ixon format. + +### Process + +1. **Load constant** from `env.consts` by address +2. **Initialize tables** from `sharing`, `refs`, `univs` +3. **Load metadata** from `env.named` +4. **Reconstruct expressions** with names and binder info from metadata +5. **Resolve references**: `Ref(idx, _)` → lookup `refs[idx]`, get name from `addr_to_name` +6. **Expand shares**: `Share(idx)` → inline `sharing[idx]` (or cache result) + +### Roundtrip Verification + +The `check_decompile` function verifies: +- Decompiled constants structurally match originals +- All names are correctly reconstructed +- No information is lost + +--- + +## Comprehensive Worked Example + +Let's trace the compilation of a simple definition through the entire system. + +### Lean Source + +```lean +def double (n : Nat) : Nat := Nat.add n n ``` -#### Const.opaq +### Step 1: Lean Expression -```lean4 --- tag: 0xC2 -| opaq : Opaque -> Const +``` +ConstantInfo::DefnInfo { + name: `double + type: Π (n : Nat) → Nat + value: λ (n : Nat) => Nat.add n n + ... +} +``` -structure Opaque where - lvls : Nat - type : Expr - value : Expr +In Lean `Expr` form: +``` +type: ForallE("n", Const(`Nat, []), Const(`Nat, []), Default) +value: Lam("n", Const(`Nat, []), + App(App(Const(`Nat.add, []), Var(0)), Var(0)), Default) ``` -Opaques are identical to theorems, except with tag 0xC2 +### Step 2: Ixon Compilation -#### Const.defn +**Build reference tables**: +- `refs[0]` = Address of `Nat` +- `refs[1]` = Address of `Nat.add` +- `univs` = [] (no universe parameters) -```lean4 --- 0xC3 -| defn : Definition -> Const +**Compile type**: +``` +All(Ref(0, []), Ref(0, [])) +``` +Binary: `0x91` (All, 1 binder) + `0x20 0x00` (Ref, 0 univs, idx 0) + `0x20 0x00` -structure Definition where - lvls : Nat - type : Expr - value : Expr - part : Bool - deriving BEq, Repr +**Compile value**: +``` +Lam(Ref(0, []), App(App(Ref(1, []), Var(0)), Var(0))) +``` +Binary: `0x81` (Lam, 1 binder) + `0x20 0x00` (Ref 0) + `0x72` (App, 2 apps) + `0x20 0x01` (Ref 1) + `0x10` (Var 0) + `0x10` (Var 0) + +**Sharing analysis**: `Var(0)` appears twice, but too small to benefit from sharing. + +**Build Constant**: +```rust +Constant { + info: Defn(Definition { + kind: Definition, + safety: Safe, + lvls: 0, + typ: All(Ref(0, []), Ref(0, [])), + value: Lam(Ref(0, []), App(App(Ref(1, []), Var(0)), Var(0))), + }), + sharing: [], + refs: [addr_of_Nat, addr_of_Nat_add], + univs: [], +} ``` -Definitions serialize as a tag byte, two Expr fields and a Bool field +### Step 3: Serialization ``` -tag-byte, body1, body2, body3, body4 -0xC3, , , , +D0 -- Tag4 { flag: 0xD, size: 0 } (Constant, Defn variant) +01 -- DefKind+Safety packed: (Definition=0 << 2) | Safe=1 +00 -- lvls = 0 (Tag0) +91 20 00 20 00 -- type: All(Ref(0,[]), Ref(0,[])) +81 20 00 72 20 01 -- value: Lam(Ref(0,[]), App(App(Ref(1,[])... + 10 10 -- ...Var(0)), Var(0))) +00 -- sharing.len = 0 +02 -- refs.len = 2 +[32 bytes] -- refs[0] = addr_of_Nat +[32 bytes] -- refs[1] = addr_of_Nat_add +00 -- univs.len = 0 ``` -#### Const.quot +Total: ~69 bytes for the constant data (plus 64 bytes for addresses). -```lean4 --- 0xC4 -| quot : Quotient -> Const +Note: The constant tag is always 1 byte (0xD0) since all non-Muts variants (0-7) fit in the 3-bit size field. -structure Quotient where - lvls : Nat - type : Expr - kind : Lean.QuotKind - deriving BEq, Repr +### Step 4: Content Address + +``` +address = blake3(serialized_constant) ``` -Quotients serialize as a tag-byte, an Expr field and a QuotKind field (a single -byte ranging from 0 to 3 according to the variant) +This address is how `double` is referenced by other constants. + +### Step 5: Metadata + +Stored separately in `Named`: + +```rust +Named { + addr: address_of_double, + meta: ConstantMeta::Def { + name: addr_of_name("double"), + lvls: [], + hints: ReducibilityHints::Regular(1), + all: [addr_of_name("double")], + ctx: [], + arena: ExprMeta { nodes: [ + // type arena: All(Ref(0,[]), Ref(0,[])) + Leaf, // 0: Ref(0,[]) inner + Leaf, // 1: Ref(0,[]) body + Binder { name: "n", info: Default, children: [0, 1] }, // 2: All binder + // value arena: Lam(Ref(0,[]), App(App(Ref(1,[]),Var(0)),Var(0))) + Leaf, // 3: Ref(0,[]) + Leaf, // 4: Ref(1,[]) + Leaf, // 5: Var(0) + App { children: [4, 5] }, // 6: App(Ref(1), Var(0)) + Leaf, // 7: Var(0) + App { children: [6, 7] }, // 8: App(App(...), Var(0)) + Binder { name: "n", info: Default, children: [3, 8] }, // 9: Lam binder + ]}, + type_root: 2, + value_root: 9, + } +} +``` + +### Step 6: Decompilation + +To reconstruct the Lean constant: + +1. Load `Constant` from `consts[address]` +2. Load `Named` from `named["double"]` +3. Resolve `Ref(0, [])` → `refs[0]` → `Nat` (via `addr_to_name`) +4. Resolve `Ref(1, [])` → `refs[1]` → `Nat.add` +5. Attach names from metadata: the binder gets name "n" from `type_meta[0]` + +Result: Original Lean `ConstantInfo` reconstructed. + +--- + +## Worked Example: Inductive Type (Bool) +Let's trace the compilation of a simple inductive type. + +### Lean Source + +```lean +inductive Bool : Type where + | false : Bool + | true : Bool ``` -tag-byte, body1, body2, body3 -0xC4, , , + +### Mutual Block Structure + +Since `Bool` is an inductive type, it's stored in a mutual block containing: +1. The inductive type itself (`Bool`) +2. Its constructors (`Bool.false`, `Bool.true`) +3. Its recursor (`Bool.rec`) + +### Ixon Compilation + +**Inductive (Bool)**: +```rust +Inductive { + recr: false, // No recursive occurrences + refl: false, // Not reflexive + is_unsafe: false, + lvls: 0, // No universe parameters + params: 0, // No parameters + indices: 0, // No indices + nested: 0, // Not nested + typ: Sort(0), // Type : Type 0 + ctors: [ctor_false, ctor_true], +} ``` -#### Const.ctor +**Constructor (Bool.false)**: +```rust +Constructor { + is_unsafe: false, + lvls: 0, + cidx: 0, // First constructor + params: 0, + fields: 0, // No fields + typ: Rec(0, []), // : Bool (mutual reference to inductive at index 0) +} +``` -TODO +**Constructor (Bool.true)**: +```rust +Constructor { + is_unsafe: false, + lvls: 0, + cidx: 1, // Second constructor + params: 0, + fields: 0, + typ: Rec(0, []), // : Bool (mutual reference to inductive at index 0) +} +``` -#### Const.recr +### Serialization -TODO +The mutual block uses flag 0xC with entry count in size field: -#### Const.indc +``` +C3 -- Tag4 { flag: 0xC, size: 3 } (Muts, 3 entries) + +-- Entry 0: Inductive (Bool) +01 -- MutConst tag 1 = Indc +00 -- Packed bools: recr=0, refl=0, is_unsafe=0 +00 -- lvls = 0 +00 -- params = 0 +00 -- indices = 0 +00 -- nested = 0 +00 -- typ: Sort(0) +02 -- ctors.len = 2 + -- ctor_false + 00 -- is_unsafe = false + 00 -- lvls = 0 + 00 -- cidx = 0 + 00 -- params = 0 + 00 -- fields = 0 + 30 00 -- typ: Rec(0, []) - mutual reference to Bool at index 0 + -- ctor_true + 00 -- is_unsafe = false + 00 -- lvls = 0 + 01 -- cidx = 1 + 00 -- params = 0 + 00 -- fields = 0 + 30 00 -- typ: Rec(0, []) - mutual reference to Bool at index 0 + +-- Entry 1: Recursor (Bool.rec) - omitted for brevity +02 ... + +-- Entry 2: Definition for Bool.casesOn or similar - if present +... -TODO +-- Shared tables +00 -- sharing.len = 0 +00 -- refs.len = 0 (no external references needed) +01 -- univs.len = 1 +00 -- univs[0] = Zero +``` -#### Const.ctorProj +### Projections -TODO +Individual constants are stored as projections into this block: +- `Bool` → `IPrj { idx: 0, block: block_addr }` +- `Bool.false` → `CPrj { idx: 0, cidx: 0, block: block_addr }` +- `Bool.true` → `CPrj { idx: 0, cidx: 1, block: block_addr }` +- `Bool.rec` → `RPrj { idx: 0, block: block_addr }` -#### Const.recrProj +--- -TODO +## Cryptographic Commitments -#### Const.indcProj +For zero-knowledge proofs, Ixon supports cryptographic commitments: -TODO +```rust +pub struct Comm { + pub secret: Address, // Random blinding factor + pub payload: Address, // Address of committed constant +} +``` -#### Const.defnProj +The commitment address is computed as: +``` +commitment = blake3(Tag4(0xE, 5) + secret + payload) +``` -TODO +The payload address is the content hash of the committed constant. Two commitments to the +same constant share the same payload address (canonicity). The secret provides blinding. -#### Const.mutDef +Commitments enable: +- **Whole-constant hiding** via `Comm` (hides everything including metadata) +- **Selective revelation** via `RevealClaim` (proves specific field values about a committed constant) +- **Expression-level blinding** via `Expr.ref ` within expression trees +- **Verifiable computation** on committed data (the ZK circuit opens commitments privately) -TODO +--- -#### Const.mutInd +## Summary -TODO +Ixon provides a sophisticated serialization format optimized for: -#### Const.meta +| Feature | Mechanism | +|---------|-----------| +| Deterministic hashing | Alpha-invariance via de Bruijn indices | +| Compact storage | Variable-length tags, telescope compression | +| Deduplication | Merkle-tree sharing within constants | +| Roundtrip fidelity | Separate metadata layer | +| Cryptographic proofs | Content-addressed storage, commitments | -TODO +The separation of alpha-invariant data from metadata is the key innovation, enabling content-addressing where structurally identical terms share the same hash regardless of cosmetic naming choices. diff --git a/flake.lock b/flake.lock index b0c5033f..78c17721 100644 --- a/flake.lock +++ b/flake.lock @@ -249,3 +249,4 @@ "root": "root", "version": 7 } + diff --git a/flake.nix b/flake.nix index f5cb3364..03644345 100644 --- a/flake.nix +++ b/flake.nix @@ -108,6 +108,7 @@ mkdir -p target/release ln -s ${rustPkg}/lib/libix_rs.a target/release/ ''; + buildInputs = [pkgs.gmp pkgs.lean.lean-all pkgs.rsync]; }; ixLib = lake2nix.mkPackage (lakeBuildArgs // { @@ -122,8 +123,11 @@ }; ixCLI = lake2nix.mkPackage (lakeBinArgs // {name = "ix";}); ixTest = lake2nix.mkPackage (lakeBinArgs // {name = "IxTests";}); + testAiur = lake2nix.mkPackage (lakeBinArgs // {name = "test-aiur";}); + testIxVM = lake2nix.mkPackage (lakeBinArgs // {name = "test-ixvm";}); benchAiur = lake2nix.mkPackage (lakeBinArgs // {name = "bench-aiur";}); benchBlake3 = lake2nix.mkPackage (lakeBinArgs // {name = "bench-blake3";}); + benchShardMap = lake2nix.mkPackage (lakeBinArgs // {name = "bench-shardmap";}); in { # Lean overlay _module.args.pkgs = import nixpkgs { @@ -135,9 +139,12 @@ default = ixLib; ix = ixCLI; test = ixTest; + test-aiur = testAiur; + test-ixvm = testIxVM; # Ix benches bench-aiur = benchAiur; bench-blake3 = benchBlake3; + bench-shardmap = benchShardMap; }; # Provide a unified dev shell with Lean + Rust @@ -145,12 +152,11 @@ packages = with pkgs; [ pkg-config openssl - ocl-icd - gcc clang rustToolchain rust-analyzer lean.lean-all # Includes Lean compiler, lake, stdlib, etc. + gmp ]; }; diff --git a/lake-manifest.json b/lake-manifest.json index 74b38ab6..c836ec33 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -35,10 +35,10 @@ "type": "git", "subDir": null, "scope": "", - "rev": "fdf848d6cda9f080a09e49e760e2d6f70878800b", + "rev": "1e6da63a9c92473747e816d07d5c6f6bc7c8a59e", "name": "LSpec", "manifestFile": "lake-manifest.json", - "inputRev": "fdf848d6cda9f080a09e49e760e2d6f70878800b", + "inputRev": "1e6da63a9c92473747e816d07d5c6f6bc7c8a59e", "inherited": false, "configFile": "lakefile.toml"}], "name": "ix", diff --git a/lakefile.lean b/lakefile.lean index 567d85f3..529ab47d 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -12,7 +12,7 @@ lean_exe ix where supportInterpreter := true require LSpec from git - "https://github.com/argumentcomputer/LSpec" @ "fdf848d6cda9f080a09e49e760e2d6f70878800b" + "https://github.com/argumentcomputer/LSpec" @ "1e6da63a9c92473747e816d07d5c6f6bc7c8a59e" require Blake3 from git "https://github.com/argumentcomputer/Blake3.lean" @ "f66794edb4612106cd7b04a7fbd04917fb1abb7d" @@ -32,6 +32,12 @@ lean_exe IxTests where root := `Tests.Main supportInterpreter := true +lean_exe «test-aiur» where + root := `Tests.AiurTest + +lean_exe «test-ixvm» where + root := `Tests.IxVMTest + end Tests lean_lib IxTestLib where @@ -47,6 +53,9 @@ lean_exe «bench-aiur» where lean_exe «bench-blake3» where root := `Benchmarks.Blake3 +lean_exe «bench-shardmap» where + root := `Benchmarks.ShardMap + end Benchmarks lean_lib Apps diff --git a/src/aiur/mod.rs b/src/aiur.rs similarity index 100% rename from src/aiur/mod.rs rename to src/aiur.rs diff --git a/src/aiur/gadgets/mod.rs b/src/aiur/gadgets.rs similarity index 100% rename from src/aiur/gadgets/mod.rs rename to src/aiur/gadgets.rs diff --git a/src/aiur/gadgets/bytes2.rs b/src/aiur/gadgets/bytes2.rs index f928af62..03a39a23 100644 --- a/src/aiur/gadgets/bytes2.rs +++ b/src/aiur/gadgets/bytes2.rs @@ -206,7 +206,7 @@ impl AiurGadget for Bytes2 { } } -/// Accumulator of queries performed against `Bytes1`. +/// Accumulator of queries performed against `Bytes2`. pub(crate) struct Bytes2Queries(Box<[[G; TRACE_WIDTH]]>); impl Bytes2Queries { diff --git a/src/iroh/mod.rs b/src/iroh.rs similarity index 100% rename from src/iroh/mod.rs rename to src/iroh.rs diff --git a/src/ix.rs b/src/ix.rs index 25906b8e..f200d81b 100644 --- a/src/ix.rs +++ b/src/ix.rs @@ -1,3 +1,9 @@ +//! Ix: content-addressed representation of Lean kernel types. +//! +//! This module contains the Lean type representation (`env`), the Ixon +//! serialization format (`ixon`), and the compilation/decompilation pipeline +//! that transforms between them. + pub mod address; pub mod compile; pub mod condense; diff --git a/src/ix/address.rs b/src/ix/address.rs index 1a61e2c8..a0adf5b7 100644 --- a/src/ix/address.rs +++ b/src/ix/address.rs @@ -1,23 +1,37 @@ +//! Content-addressed identifiers based on Blake3 hashing. +//! +//! [`Address`] wraps a 32-byte Blake3 digest and is used throughout the Ix +//! pipeline to uniquely identify constants, blobs, and other data. + use blake3::Hash; use core::array::TryFromSliceError; use std::cmp::{Ordering, PartialOrd}; use std::hash::{Hash as StdHash, Hasher}; +/// A 32-byte Blake3 content address. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Address { hash: Hash, } impl Address { + /// Constructs an address from a 32-byte slice. pub fn from_slice(input: &[u8]) -> Result { Ok(Address { hash: Hash::from_slice(input)? }) } + /// Wraps an existing Blake3 hash as an address. + pub fn from_blake3_hash(hash: Hash) -> Self { + Address { hash } + } + /// Hashes arbitrary bytes with Blake3 and returns the resulting address. pub fn hash(input: &[u8]) -> Self { Address { hash: blake3::hash(input) } } + /// Returns the address as a lowercase hexadecimal string. pub fn hex(&self) -> String { self.hash.to_hex().as_str().to_owned() } + /// Returns the raw 32-byte digest. pub fn as_bytes(&self) -> &[u8; 32] { self.hash.as_bytes() } @@ -40,22 +54,6 @@ impl StdHash for Address { } } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct MetaAddress { - pub data: Address, - pub meta: Address, -} - -//impl Display for MetaAddress {} - -// TODO: DELETEME -//impl Default for MetaAddress { -// fn default() -> Self { -// let addr = Address { hash: [0; 32].into() }; -// Self { data: addr.clone(), meta: addr } -// } -//} - #[cfg(test)] pub mod tests { use super::*; @@ -70,9 +68,4 @@ pub mod tests { Address::from_slice(&bytes).unwrap() } } - impl Arbitrary for MetaAddress { - fn arbitrary(g: &mut Gen) -> Self { - MetaAddress { data: Address::arbitrary(g), meta: Address::arbitrary(g) } - } - } } diff --git a/src/ix/compile.rs b/src/ix/compile.rs index 382fb417..5c2a8269 100644 --- a/src/ix/compile.rs +++ b/src/ix/compile.rs @@ -1,1017 +1,1678 @@ +//! Compilation from Lean environment to Ixon format. +//! +//! This module compiles Lean constants to alpha-invariant Ixon representations +//! with sharing analysis for deduplication within constants + +#![allow(clippy::cast_possible_truncation)] +#![allow(clippy::cast_precision_loss)] + use dashmap::{DashMap, DashSet}; -use itertools::Itertools; -use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use rustc_hash::FxHashMap; -use std::{cmp::Ordering, sync::Arc}; +use std::{ + cmp::Ordering, + sync::{ + Arc, + atomic::{AtomicUsize, Ordering as AtomicOrdering}, + }, + thread, +}; use crate::{ - ix::address::{Address, MetaAddress}, + ix::address::Address, ix::condense::compute_sccs, ix::env::{ - AxiomVal, BinderInfo, ConstantInfo, ConstructorVal, - DataValue as LeanDataValue, Env, Expr, ExprData, InductiveVal, Level, - LevelData, Literal, Name, NameData, QuotVal, RecursorRule, - SourceInfo as LeanSourceInfo, Substring as LeanSubstring, - Syntax as LeanSyntax, SyntaxPreresolved, + AxiomVal, BinderInfo, ConstantInfo as LeanConstantInfo, ConstructorVal, + DataValue as LeanDataValue, Env as LeanEnv, Expr as LeanExpr, ExprData, + InductiveVal, Level, LevelData, Literal, Name, NameData, QuotVal, + RecursorRule as LeanRecursorRule, SourceInfo as LeanSourceInfo, + Substring as LeanSubstring, Syntax as LeanSyntax, SyntaxPreresolved, }, ix::graph::{NameSet, build_ref_graph}, ix::ground::ground_consts, ix::ixon::{ - self, Axiom, BuiltIn, Constructor, ConstructorProj, DataValue, Definition, - DefinitionProj, Inductive, InductiveProj, Ixon, Metadata, Metadatum, - Preresolved, Quotient, Recursor, RecursorProj, Serialize, SourceInfo, - Substring, Syntax, + CompileError, Tag0, + constant::{ + Axiom, Constant, ConstantInfo, Constructor, ConstructorProj, Definition, + DefinitionProj, Inductive, InductiveProj, MutConst as IxonMutConst, + Quotient, Recursor, RecursorProj, RecursorRule, + }, + env::{Env as IxonEnv, Named}, + expr::Expr, + metadata::{ConstantMeta, DataValue, ExprMeta, ExprMetaData, KVMap}, + sharing::{self, analyze_block, build_sharing_vec, decide_sharing}, + univ::Univ, }, - ix::mutual::{Def, Ind, MutConst, MutCtx, Rec}, + ix::mutual::{Def, Ind, MutConst, MutCtx, Rec, ctx_to_all}, ix::strong_ordering::SOrd, lean::nat::Nat, }; +/// Whether to track hash-consed sizes during compilation. +/// This adds overhead to sharing analysis and can be disabled for production. +/// Set to `true` to enable hash-consed vs serialized size comparison. +pub static TRACK_HASH_CONSED_SIZE: std::sync::atomic::AtomicBool = + std::sync::atomic::AtomicBool::new(false); + +/// Whether to output verbose sharing analysis for pathological blocks. +/// Set via IX_ANALYZE_SHARING=1 environment variable. +pub static ANALYZE_SHARING: std::sync::atomic::AtomicBool = + std::sync::atomic::AtomicBool::new(false); + +/// Size statistics for a compiled block. +#[derive(Clone, Debug, Default)] +pub struct BlockSizeStats { + /// Hash-consed size: sum of unique subterm sizes (theoretical minimum with perfect sharing) + pub hash_consed_size: usize, + /// Serialized Ixon size: actual bytes when serialized + pub serialized_size: usize, + /// Number of constants in the block + pub const_count: usize, +} + +/// Compile state for building the Ixon environment. #[derive(Default)] pub struct CompileState { - pub consts: DashMap, - pub names: DashMap, - pub blocks: DashSet, - pub store: DashMap>, + /// Ixon environment being built + pub env: IxonEnv, + /// Map from Lean constant name to Ixon address + pub name_to_addr: DashMap, + /// Addresses of mutual blocks + pub blocks: DashSet
, + /// Per-block size statistics (keyed by low-link name) + pub block_stats: DashMap, +} + +/// Cached compiled expression with arena root index. +/// +/// On cache hit: O(1) — just push the cached expr and arena_root. +/// The subtree's metadata nodes are already in the arena (append-only). +#[derive(Clone, Debug)] +pub struct CachedExpr { + pub expr: Arc, + pub arena_root: u64, } +/// Per-block compilation cache. #[derive(Default)] pub struct BlockCache { - pub exprs: FxHashMap, - pub univs: FxHashMap, + /// Cache for compiled expressions (keyed by Lean hash address) + pub exprs: FxHashMap, + /// Cache for compiled universes (Level -> Univ conversion) + pub univ_cache: FxHashMap>, + /// Cache for expression comparisons pub cmps: FxHashMap<(Name, Name), Ordering>, + /// Arena for expression metadata (append-only within a constant) + pub arena: ExprMeta, + /// Arena root indices parallel to the results stack + pub arena_roots: Vec, + /// Reference table: unique addresses of constants referenced by Expr::Ref + pub refs: indexmap::IndexSet
, + /// Universe table: unique universes referenced by expressions + pub univs: indexmap::IndexSet>, } #[derive(Debug)] pub struct CompileStateStats { pub consts: usize, pub names: usize, + pub blobs: usize, pub blocks: usize, - pub store: usize, } impl CompileState { + /// Create an empty compile state for testing (no environment). + pub fn new_empty() -> Self { + Self { + env: IxonEnv::default(), + name_to_addr: DashMap::new(), + blocks: DashSet::new(), + block_stats: DashMap::new(), + } + } + pub fn stats(&self) -> CompileStateStats { CompileStateStats { - consts: self.consts.len(), - names: self.names.len(), + consts: self.env.const_count(), + names: self.env.name_count(), + blobs: self.env.blob_count(), blocks: self.blocks.len(), - store: self.store.len(), } } } -#[derive(Debug)] -pub enum CompileError { - //StoreError(StoreError), - UngroundedEnv, - CondenseError, - LevelParam(Name, Vec), - LevelMVar(Name), - Ref(Name), - ExprFVar, - ExprMVar, - CompileExpr, - MkIndc, - SortConsts, - CompileMutConsts(Vec), - CompileMutual, - CompileMutual2, - CompileMutual3, - CompileMutual4, - CompileMutual5, - CompileConstInfo, - CompileConstInfo2, - CompileConst, -} - -//pub type CompileResult = -// Result>, CompileError>; - -//pub type Consts = Arc>; - -pub fn store_ixon( - ixon: &Ixon, - stt: &CompileState, -) -> Result { - let mut bytes = Vec::new(); - ixon.put(&mut bytes); - let addr = Address::hash(&bytes); - stt.store.insert(addr.clone(), bytes); - Ok(addr) - //Store::write(&bytes).map_err(CompileError::StoreError) -} +// =========================================================================== +// Helper functions +// =========================================================================== -pub fn store_string( - str: &str, - stt: &CompileState, -) -> Result { - let bytes = str.as_bytes(); - let addr = Address::hash(bytes); - stt.store.insert(addr.clone(), bytes.to_vec()); - Ok(addr) - //Store::write(str.as_bytes()).map_err(CompileError::StoreError) +/// Convert a Nat to u64, returning an error if the value is too large. +fn nat_to_u64(n: &Nat, context: &'static str) -> Result { + n.to_u64().ok_or(CompileError::UnsupportedExpr { desc: context.into() }) } -pub fn store_nat( - nat: &Nat, - stt: &CompileState, -) -> Result { - let bytes = nat.to_le_bytes(); - let addr = Address::hash(&bytes); - stt.store.insert(addr.clone(), bytes); - Ok(addr) - //Store::write(&nat.to_le_bytes()).map_err(CompileError::StoreError) -} +// =========================================================================== +// Name compilation +// =========================================================================== -pub fn store_serialize( - a: &A, - stt: &CompileState, -) -> Result { - let mut bytes = Vec::new(); - a.put(&mut bytes); - let addr = Address::hash(&bytes); - stt.store.insert(addr.clone(), bytes); - Ok(addr) - //Store::write(&bytes).map_err(CompileError::StoreError) +/// Store a string as a blob and return its address. +pub fn store_string(s: &str, stt: &CompileState) -> Address { + stt.env.store_blob(s.as_bytes().to_vec()) } -pub fn store_meta( - x: &Metadata, - stt: &CompileState, -) -> Result { - let mut bytes = Vec::new(); - x.put(&mut bytes); - let addr = Address::hash(&bytes); - stt.store.insert(addr.clone(), bytes); - Ok(addr) - //Store::write(&bytes).map_err(CompileError::StoreError) +/// Store a Nat as a blob and return its address. +pub fn store_nat(n: &Nat, stt: &CompileState) -> Address { + stt.env.store_blob(n.to_le_bytes()) } -pub fn compile_name( - name: &Name, - stt: &CompileState, -) -> Result { - if let Some(cached) = stt.names.get(name) { - Ok(cached.clone()) - } else { - let addr = match name.as_data() { - NameData::Anonymous(_) => store_ixon(&Ixon::NAnon, stt)?, - NameData::Str(n, s, _) => { - let n2 = compile_name(n, stt)?; - let s2 = store_string(s, stt)?; - store_ixon(&Ixon::NStr(n2, s2), stt)? - }, - NameData::Num(n, i, _) => { - let n_ = compile_name(n, stt)?; - let s_ = store_nat(i, stt)?; - store_ixon(&Ixon::NNum(n_, s_), stt)? - }, - }; - stt.names.insert(name.clone(), addr.clone()); - Ok(addr) - } -} +/// Compile a Lean Name to an address (stored in env.names). +/// Uses the Name's internal hash as the address. +/// String components are stored in blobs. +pub fn compile_name(name: &Name, stt: &CompileState) -> Address { + // Use the Name's internal hash as the address + let addr = Address::from_blake3_hash(*name.get_hash()); -pub fn compile_level( - level: &Level, - univs: &[Name], - cache: &mut BlockCache, - stt: &CompileState, -) -> Result { - if let Some(cached) = cache.univs.get(level) { - return Ok(cached.clone()); + // Check if already stored + if stt.env.names.contains_key(&addr) { + return addr; } - let data_ixon = match level.as_data() { - LevelData::Zero(_) => Ixon::UZero, - LevelData::Succ(x, _) => Ixon::USucc(compile_level(x, univs, cache, stt)?), - LevelData::Max(x, y, _) => { - let x = compile_level(x, univs, cache, stt)?; - let y = compile_level(y, univs, cache, stt)?; - Ixon::UMax(x, y) - }, - LevelData::Imax(x, y, _) => { - let x = compile_level(x, univs, cache, stt)?; - let y = compile_level(y, univs, cache, stt)?; - Ixon::UIMax(x, y) - }, - LevelData::Param(n, _) => match univs.iter().position(|x| x == n) { - Some(i) => Ixon::UVar(Nat::from_le_bytes(&i.to_le_bytes())), - None => { - return Err(CompileError::LevelParam(n.clone(), univs.to_vec())); - }, - }, - LevelData::Mvar(x, _) => { - return Err(CompileError::LevelMVar(x.clone())); - }, - }; - let addr = store_ixon(&data_ixon, stt)?; - cache.univs.insert(level.clone(), addr.clone()); - Ok(addr) -} -pub fn compare_level( - x: &Level, - y: &Level, - x_ctx: &[Name], - y_ctx: &[Name], -) -> Result { - match (x.as_data(), y.as_data()) { - (LevelData::Mvar(e, _), _) | (_, LevelData::Mvar(e, _)) => { - Err(CompileError::LevelMVar(e.clone())) - }, - (LevelData::Zero(_), LevelData::Zero(_)) => Ok(SOrd::eq(true)), - (LevelData::Zero(_), _) => Ok(SOrd::lt(true)), - (_, LevelData::Zero(_)) => Ok(SOrd::gt(true)), - (LevelData::Succ(x, _), LevelData::Succ(y, _)) => { - compare_level(x, y, x_ctx, y_ctx) - }, - (LevelData::Succ(_, _), _) => Ok(SOrd::lt(true)), - (_, LevelData::Succ(_, _)) => Ok(SOrd::gt(true)), - (LevelData::Max(xl, xr, _), LevelData::Max(yl, yr, _)) => { - SOrd::try_compare(compare_level(xl, yl, x_ctx, y_ctx)?, || { - compare_level(xr, yr, x_ctx, y_ctx) - }) - }, - (LevelData::Max(_, _, _), _) => Ok(SOrd::lt(true)), - (_, LevelData::Max(_, _, _)) => Ok(SOrd::gt(true)), - (LevelData::Imax(xl, xr, _), LevelData::Imax(yl, yr, _)) => { - SOrd::try_compare(compare_level(xl, yl, x_ctx, y_ctx)?, || { - compare_level(xr, yr, x_ctx, y_ctx) - }) + // Recurse on parent first (ensures parent is stored) + match name.as_data() { + NameData::Anonymous(_) => {}, + NameData::Str(parent, s, _) => { + compile_name(parent, stt); + store_string(s, stt); // string data in blobs }, - (LevelData::Imax(_, _, _), _) => Ok(SOrd::lt(true)), - (_, LevelData::Imax(_, _, _)) => Ok(SOrd::gt(true)), - (LevelData::Param(x, _), LevelData::Param(y, _)) => { - match ( - x_ctx.iter().position(|n| x == n), - y_ctx.iter().position(|n| y == n), - ) { - (Some(xi), Some(yi)) => Ok(SOrd::cmp(&xi, &yi)), - (None, _) => Err(CompileError::LevelParam(x.clone(), x_ctx.to_vec())), - (_, None) => Err(CompileError::LevelParam(y.clone(), y_ctx.to_vec())), - } + NameData::Num(parent, _, _) => { + compile_name(parent, stt); + // Nat is inline in Name, no blob needed }, } -} -pub fn compile_substring( - substring: &LeanSubstring, - stt: &CompileState, -) -> Result { - let LeanSubstring { str, start_pos, stop_pos } = substring; - let str = store_string(str, stt)?; - Ok(Substring { - str, - start_pos: start_pos.clone(), - stop_pos: stop_pos.clone(), - }) + // Store Name struct directly in env.names + stt.env.names.insert(addr.clone(), name.clone()); + addr } -pub fn compile_preresolved( - preresolved: &SyntaxPreresolved, - stt: &CompileState, -) -> Result { - match preresolved { - SyntaxPreresolved::Namespace(ns) => { - Ok(Preresolved::Namespace(compile_name(ns, stt)?)) - }, - SyntaxPreresolved::Decl(n, fs) => { - let fs = fs.iter().map(|s| store_string(s, stt)).try_collect()?; - Ok(Preresolved::Decl(compile_name(n, stt)?, fs)) - }, - } -} +// =========================================================================== +// Universe compilation +// =========================================================================== -pub fn compile_source_info( - info: &LeanSourceInfo, - stt: &CompileState, -) -> Result { - match info { - LeanSourceInfo::Original(l, p, t, e) => { - let l = compile_substring(l, stt)?; - let t = compile_substring(t, stt)?; - Ok(SourceInfo::Original(l, p.clone(), t, e.clone())) - }, - LeanSourceInfo::Synthetic(p, e, c) => { - Ok(SourceInfo::Synthetic(p.clone(), e.clone(), *c)) - }, - LeanSourceInfo::None => Ok(SourceInfo::None), +/// Compile a Lean Level to an Ixon Univ. +pub fn compile_univ( + level: &Level, + univ_params: &[Name], + cache: &mut BlockCache, +) -> Result, CompileError> { + if let Some(cached) = cache.univ_cache.get(level) { + return Ok(cached.clone()); } -} -pub fn compile_syntax( - syn: &LeanSyntax, - stt: &CompileState, -) -> Result { - match syn { - LeanSyntax::Missing => Ok(Syntax::Missing), - LeanSyntax::Node(info, kind, args) => { - let info = compile_source_info(info, stt)?; - let kind = compile_name(kind, stt)?; - let args = args - .iter() - .map(|s| store_serialize(&compile_syntax(s, stt)?, stt)) - .try_collect()?; - Ok(Syntax::Node(info, kind, args)) + let univ = match level.as_data() { + LevelData::Zero(_) => Univ::zero(), + LevelData::Succ(inner, _) => { + let inner_univ = compile_univ(inner, univ_params, cache)?; + Univ::succ(inner_univ) }, - LeanSyntax::Atom(info, val) => { - let info = compile_source_info(info, stt)?; - let val = store_string(val, stt)?; - Ok(Syntax::Atom(info, val)) + LevelData::Max(a, b, _) => { + let a_univ = compile_univ(a, univ_params, cache)?; + let b_univ = compile_univ(b, univ_params, cache)?; + Univ::max(a_univ, b_univ) }, - LeanSyntax::Ident(info, raw_val, val, preresolved) => { - let info = compile_source_info(info, stt)?; - let raw_val = compile_substring(raw_val, stt)?; - let val = compile_name(val, stt)?; - let preresolved = preresolved - .iter() - .map(|pre| compile_preresolved(pre, stt)) - .try_collect()?; - Ok(Syntax::Ident(info, raw_val, val, preresolved)) + LevelData::Imax(a, b, _) => { + let a_univ = compile_univ(a, univ_params, cache)?; + let b_univ = compile_univ(b, univ_params, cache)?; + Univ::imax(a_univ, b_univ) }, - } -} - -pub fn compile_data_value( - data_value: &LeanDataValue, - stt: &CompileState, -) -> Result { - match data_value { - LeanDataValue::OfString(s) => { - Ok(DataValue::OfString(store_string(s, stt)?)) + LevelData::Param(name, _) => { + let idx = + univ_params.iter().position(|n| n == name).ok_or_else(|| { + CompileError::UnknownUnivParam { + curr: String::new(), + param: name.pretty(), + } + })?; + Univ::var(idx as u64) }, - LeanDataValue::OfBool(b) => Ok(DataValue::OfBool(*b)), - LeanDataValue::OfName(n) => Ok(DataValue::OfName(compile_name(n, stt)?)), - LeanDataValue::OfNat(i) => Ok(DataValue::OfNat(store_nat(i, stt)?)), - LeanDataValue::OfInt(i) => Ok(DataValue::OfInt(store_serialize(i, stt)?)), - LeanDataValue::OfSyntax(s) => { - Ok(DataValue::OfSyntax(store_serialize(&compile_syntax(s, stt)?, stt)?)) + LevelData::Mvar(_name, _) => { + return Err(CompileError::UnsupportedExpr { + desc: "level metavariable".into(), + }); }, - } + }; + + cache.univ_cache.insert(level.clone(), univ.clone()); + Ok(univ) } -pub fn compile_kv_map( - kv: &Vec<(Name, LeanDataValue)>, - stt: &CompileState, -) -> Result, CompileError> { - let mut list = Vec::with_capacity(kv.len()); - for (name, data_value) in kv { - let n = compile_name(name, stt)?; - let d = compile_data_value(data_value, stt)?; - list.push((n, d)); - } - Ok(list) +/// Compile a universe and add it to the univs table, returning its index. +fn compile_univ_idx( + level: &Level, + univ_params: &[Name], + cache: &mut BlockCache, +) -> Result { + let univ = compile_univ(level, univ_params, cache)?; + let (idx, _) = cache.univs.insert_full(univ); + Ok(idx as u64) } -pub fn compile_ref( - name: &Name, - stt: &CompileState, -) -> Result { - if let Some(builtin) = BuiltIn::from_name(name) { - Ok(MetaAddress { - data: store_ixon(&Ixon::Prim(builtin), stt)?, - meta: store_ixon(&Ixon::Meta(Metadata { nodes: vec![] }), stt)?, - }) - } else if let Some(addr) = stt.consts.get(name) { - Ok(addr.clone()) - } else { - Err(CompileError::Ref(name.clone())) - } + +/// Compile a list of universes and add them to the univs table, returning indices. +fn compile_univ_indices( + levels: &[Level], + univ_params: &[Name], + cache: &mut BlockCache, +) -> Result, CompileError> { + levels.iter().map(|l| compile_univ_idx(l, univ_params, cache)).collect() } +// =========================================================================== +// Expression compilation +// =========================================================================== + +/// Compile a Lean expression to an Ixon expression. +/// Builds arena-based metadata in cache.arena with bottom-up allocation. pub fn compile_expr( - expr: &Expr, - univ_ctx: &[Name], + expr: &LeanExpr, + univ_params: &[Name], mut_ctx: &MutCtx, cache: &mut BlockCache, stt: &CompileState, -) -> Result { +) -> Result, CompileError> { + // Stack-based iterative compilation to avoid stack overflow enum Frame<'a> { - Compile(&'a Expr), - Mdata(Vec<(Address, DataValue)>), - App, - Lam(Address, BinderInfo), - All(Address, BinderInfo), - Let(Address, bool), - Proj(Address, MetaAddress, Nat), - Cache(Expr), - } - if let Some(cached) = cache.exprs.get(expr) { - return Ok(cached.clone()); + Compile(&'a LeanExpr), + BuildApp, + BuildLam(Address, BinderInfo), + BuildAll(Address, BinderInfo), + BuildLet(Address, bool), + BuildProj(u64, u64, Address), // type_ref_idx, field_idx, struct_name_addr + WrapMdata(Vec), + Cache(&'a LeanExpr), + } + + // Top-level cache check (O(1) with arena) + let expr_key = Address::from_blake3_hash(*expr.get_hash()); + if let Some(cached) = cache.exprs.get(&expr_key).cloned() { + cache.arena_roots.push(cached.arena_root); + return Ok(cached.expr); } + let mut stack: Vec> = vec![Frame::Compile(expr)]; - let mut result: Vec = vec![]; + let mut results: Vec> = Vec::new(); while let Some(frame) = stack.pop() { match frame { - Frame::Compile(expr) => { - if let Some(cached) = cache.exprs.get(expr) { - result.push(cached.clone()); + Frame::Compile(e) => { + let e_key = Address::from_blake3_hash(*e.get_hash()); + if let Some(cached) = cache.exprs.get(&e_key).cloned() { + // O(1) cache hit: arena root already valid + results.push(cached.expr); + cache.arena_roots.push(cached.arena_root); continue; } - stack.push(Frame::Cache(expr.clone())); - match expr.as_data() { - ExprData::Mdata(kv, inner, _) => { - let kvs = compile_kv_map(kv, stt)?; - stack.push(Frame::Mdata(kvs)); - stack.push(Frame::Compile(inner)); - }, + + stack.push(Frame::Cache(e)); + + match e.as_data() { ExprData::Bvar(idx, _) => { - let data = store_ixon(&Ixon::EVar(idx.clone()), stt)?; - let meta = store_ixon(&Ixon::meta(vec![]), stt)?; - result.push(MetaAddress { meta, data }) + let idx_u64 = nat_to_u64(idx, "bvar index too large")?; + results.push(Expr::var(idx_u64)); + cache.arena_roots.push(cache.arena.alloc(ExprMetaData::Leaf)); }, - ExprData::Sort(univ, _) => { - let u = compile_level(univ, univ_ctx, cache, stt)?; - let data = store_ixon(&Ixon::ESort(u), stt)?; - let meta = store_ixon(&Ixon::meta(vec![]), stt)?; - result.push(MetaAddress { meta, data }) + + ExprData::Sort(level, _) => { + let univ_idx = compile_univ_idx(level, univ_params, cache)?; + results.push(Expr::sort(univ_idx)); + cache.arena_roots.push(cache.arena.alloc(ExprMetaData::Leaf)); }, - ExprData::Const(name, lvls, _) => { - let n = compile_name(name, stt)?; - let mut lds = Vec::with_capacity(lvls.len()); - for l in lvls { - let u = compile_level(l, univ_ctx, cache, stt)?; - lds.push(u); + + ExprData::Const(name, levels, _) => { + let univ_indices = + compile_univ_indices(levels, univ_params, cache)?; + let name_addr = compile_name(name, stt); + + // Check if this is a mutual reference + if let Some(idx) = mut_ctx.get(name) { + let idx_u64 = nat_to_u64(idx, "mutual index too large")?; + results.push(Expr::rec(idx_u64, univ_indices)); + cache + .arena_roots + .push(cache.arena.alloc(ExprMetaData::Ref { name: name_addr })); + } else { + // External reference + let const_addr = stt + .name_to_addr + .get(name) + .ok_or_else(|| CompileError::MissingConstant { + name: name.pretty(), + })? + .clone(); + let (ref_idx, _) = cache.refs.insert_full(const_addr); + results.push(Expr::reference(ref_idx as u64, univ_indices)); + cache + .arena_roots + .push(cache.arena.alloc(ExprMetaData::Ref { name: name_addr })); } - match mut_ctx.get(name) { - Some(idx) => { - let data = store_ixon(&Ixon::ERec(idx.clone(), lds), stt)?; - let meta = - store_ixon(&Ixon::meta(vec![Metadatum::Link(n)]), stt)?; - result.push(MetaAddress { data, meta }) - }, - None => { - let addr = compile_ref(name, stt)?; - let data = - store_ixon(&Ixon::ERef(addr.data.clone(), lds), stt)?; - let meta = store_ixon( - &Ixon::meta(vec![ - Metadatum::Link(n), - Metadatum::Link(addr.meta.clone()), - ]), - stt, - )?; - result.push(MetaAddress { data, meta }) - }, - }; }, + ExprData::App(f, a, _) => { - stack.push(Frame::App); + stack.push(Frame::BuildApp); stack.push(Frame::Compile(a)); stack.push(Frame::Compile(f)); }, - ExprData::Lam(name, t, b, info, _) => { - let n = compile_name(name, stt)?; - stack.push(Frame::Lam(n, info.clone())); - stack.push(Frame::Compile(b)); - stack.push(Frame::Compile(t)); + + ExprData::Lam(name, ty, body, info, _) => { + let name_addr = compile_name(name, stt); + stack.push(Frame::BuildLam(name_addr, info.clone())); + stack.push(Frame::Compile(body)); + stack.push(Frame::Compile(ty)); }, - ExprData::ForallE(name, t, b, info, _) => { - let n = compile_name(name, stt)?; - stack.push(Frame::All(n, info.clone())); - stack.push(Frame::Compile(b)); - stack.push(Frame::Compile(t)); + + ExprData::ForallE(name, ty, body, info, _) => { + let name_addr = compile_name(name, stt); + stack.push(Frame::BuildAll(name_addr, info.clone())); + stack.push(Frame::Compile(body)); + stack.push(Frame::Compile(ty)); }, - ExprData::LetE(name, t, v, b, nd, _) => { - let n = compile_name(name, stt)?; - stack.push(Frame::Let(n, *nd)); - stack.push(Frame::Compile(b)); - stack.push(Frame::Compile(v)); - stack.push(Frame::Compile(t)); + + ExprData::LetE(name, ty, val, body, non_dep, _) => { + let name_addr = compile_name(name, stt); + stack.push(Frame::BuildLet(name_addr, *non_dep)); + stack.push(Frame::Compile(body)); + stack.push(Frame::Compile(val)); + stack.push(Frame::Compile(ty)); }, + ExprData::Lit(Literal::NatVal(n), _) => { - let data = store_ixon(&Ixon::ENat(store_nat(n, stt)?), stt)?; - let meta = store_ixon(&Ixon::meta(vec![]), stt)?; - result.push(MetaAddress { data, meta }) + let addr = store_nat(n, stt); + let (ref_idx, _) = cache.refs.insert_full(addr); + results.push(Expr::nat(ref_idx as u64)); + cache.arena_roots.push(cache.arena.alloc(ExprMetaData::Leaf)); }, - ExprData::Lit(Literal::StrVal(n), _) => { - let data = store_ixon(&Ixon::EStr(store_string(n, stt)?), stt)?; - let meta = store_ixon(&Ixon::meta(vec![]), stt)?; - result.push(MetaAddress { data, meta }) + + ExprData::Lit(Literal::StrVal(s), _) => { + let addr = store_string(s, stt); + let (ref_idx, _) = cache.refs.insert_full(addr); + results.push(Expr::str(ref_idx as u64)); + cache.arena_roots.push(cache.arena.alloc(ExprMetaData::Leaf)); + }, + + ExprData::Proj(type_name, idx, struct_val, _) => { + let idx_u64 = nat_to_u64(idx, "proj index too large")?; + + let type_addr = stt + .name_to_addr + .get(type_name) + .ok_or_else(|| CompileError::MissingConstant { + name: type_name.pretty(), + })? + .clone(); + + let (ref_idx, _) = cache.refs.insert_full(type_addr); + let name_addr = compile_name(type_name, stt); + + stack.push(Frame::BuildProj(ref_idx as u64, idx_u64, name_addr)); + stack.push(Frame::Compile(struct_val)); + }, + + ExprData::Mdata(kv, inner, _) => { + // Compile KV map + let mut pairs = Vec::new(); + for (k, v) in kv { + let k_addr = compile_name(k, stt); + let v_data = compile_data_value(v, stt); + pairs.push((k_addr, v_data)); + } + // Mdata becomes a separate arena node wrapping inner + stack.push(Frame::WrapMdata(vec![pairs])); + stack.push(Frame::Compile(inner)); + }, + + ExprData::Fvar(..) => { + return Err(CompileError::UnsupportedExpr { + desc: "free variable".into(), + }); }, - ExprData::Proj(tn, i, s, _) => { - let n = compile_name(tn, stt)?; - let t = compile_ref(tn, stt)?; - stack.push(Frame::Proj(n, t, i.clone())); - stack.push(Frame::Compile(s)); + + ExprData::Mvar(..) => { + return Err(CompileError::UnsupportedExpr { + desc: "metavariable".into(), + }); }, - ExprData::Fvar(..) => return Err(CompileError::ExprFVar), - ExprData::Mvar(..) => return Err(CompileError::ExprMVar), } }, - Frame::Mdata(kv) => { - let inner = result.pop().unwrap(); - let meta = store_ixon( - &Ixon::meta(vec![Metadatum::KVMap(kv), Metadatum::Link(inner.meta)]), - stt, - )?; - result.push(MetaAddress { data: inner.data, meta }); - }, - Frame::App => { - let a = result.pop().expect("Frame::App missing a result"); - let f = result.pop().expect("Frame::App missing f result"); - let data = store_ixon(&Ixon::EApp(f.data, a.data), stt)?; - let meta = store_ixon( - &Ixon::meta(vec![Metadatum::Link(f.meta), Metadatum::Link(a.meta)]), - stt, - )?; - result.push(MetaAddress { data, meta }) - }, - Frame::Lam(n, i) => { - let b = result.pop().expect("Frame::Lam missing b result"); - let t = result.pop().expect("Frame::Lam missing t result"); - let data = store_ixon(&Ixon::ELam(t.data, b.data), stt)?; - let meta = store_ixon( - &Ixon::meta(vec![ - Metadatum::Link(n), - Metadatum::Info(i), - Metadatum::Link(t.meta), - Metadatum::Link(b.meta), - ]), - stt, - )?; - result.push(MetaAddress { data, meta }) - }, - Frame::All(n, i) => { - let b = result.pop().expect("Frame::All missing b result"); - let t = result.pop().expect("Frame::All missing t result"); - let data = store_ixon(&Ixon::EAll(t.data, b.data), stt)?; - let meta = store_ixon( - &Ixon::meta(vec![ - Metadatum::Link(n), - Metadatum::Info(i), - Metadatum::Link(t.meta), - Metadatum::Link(b.meta), - ]), - stt, - )?; - result.push(MetaAddress { data, meta }) - }, - Frame::Let(n, nd) => { - let b = result.pop().expect("Frame::Let missing b result"); - let v = result.pop().expect("Frame::Let missing v result"); - let t = result.pop().expect("Frame::Let missing t result"); - let data = store_ixon(&Ixon::ELet(nd, t.data, v.data, b.data), stt)?; - let meta = store_ixon( - &Ixon::meta(vec![ - Metadatum::Link(n), - Metadatum::Link(t.meta), - Metadatum::Link(v.meta), - Metadatum::Link(b.meta), - ]), - stt, - )?; - result.push(MetaAddress { data, meta }) - }, - Frame::Proj(n, t, i) => { - let s = result.pop().expect("Frame::Proj missing s result"); - let data = store_ixon(&Ixon::EPrj(t.data, i.clone(), s.data), stt)?; - let meta = store_ixon( - &Ixon::meta(vec![ - Metadatum::Link(n), - Metadatum::Link(t.meta), - Metadatum::Link(s.meta), - ]), - stt, - )?; - result.push(MetaAddress { data, meta }) - }, - Frame::Cache(expr) => { - if let Some(result) = result.last() { - cache.exprs.insert(expr, result.clone()); + + Frame::BuildApp => { + let a_root = + cache.arena_roots.pop().expect("BuildApp missing arg root"); + let f_root = + cache.arena_roots.pop().expect("BuildApp missing fun root"); + let arg = results.pop().expect("BuildApp missing arg"); + let fun = results.pop().expect("BuildApp missing fun"); + results.push(Expr::app(fun, arg)); + cache.arena_roots.push( + cache.arena.alloc(ExprMetaData::App { children: [f_root, a_root] }), + ); + }, + + Frame::BuildLam(name_addr, info) => { + let body_root = + cache.arena_roots.pop().expect("BuildLam missing body root"); + let ty_root = + cache.arena_roots.pop().expect("BuildLam missing ty root"); + let body = results.pop().expect("BuildLam missing body"); + let ty = results.pop().expect("BuildLam missing ty"); + results.push(Expr::lam(ty, body)); + cache.arena_roots.push(cache.arena.alloc(ExprMetaData::Binder { + name: name_addr, + info, + children: [ty_root, body_root], + })); + }, + + Frame::BuildAll(name_addr, info) => { + let body_root = + cache.arena_roots.pop().expect("BuildAll missing body root"); + let ty_root = + cache.arena_roots.pop().expect("BuildAll missing ty root"); + let body = results.pop().expect("BuildAll missing body"); + let ty = results.pop().expect("BuildAll missing ty"); + results.push(Expr::all(ty, body)); + cache.arena_roots.push(cache.arena.alloc(ExprMetaData::Binder { + name: name_addr, + info, + children: [ty_root, body_root], + })); + }, + + Frame::BuildLet(name_addr, non_dep) => { + let body_root = + cache.arena_roots.pop().expect("BuildLet missing body root"); + let val_root = + cache.arena_roots.pop().expect("BuildLet missing val root"); + let ty_root = + cache.arena_roots.pop().expect("BuildLet missing ty root"); + let body = results.pop().expect("BuildLet missing body"); + let val = results.pop().expect("BuildLet missing val"); + let ty = results.pop().expect("BuildLet missing ty"); + results.push(Expr::let_(non_dep, ty, val, body)); + cache.arena_roots.push(cache.arena.alloc(ExprMetaData::LetBinder { + name: name_addr, + children: [ty_root, val_root, body_root], + })); + }, + + Frame::BuildProj(type_ref_idx, field_idx, struct_name_addr) => { + let child_root = + cache.arena_roots.pop().expect("BuildProj missing child root"); + let struct_val = results.pop().expect("BuildProj missing struct_val"); + results.push(Expr::prj(type_ref_idx, field_idx, struct_val)); + cache.arena_roots.push(cache.arena.alloc(ExprMetaData::Prj { + struct_name: struct_name_addr, + child: child_root, + })); + }, + + Frame::WrapMdata(mdata) => { + // Mdata doesn't change the Ixon expression — only wraps the arena node + let inner_root = + cache.arena_roots.pop().expect("WrapMdata missing inner root"); + cache.arena_roots.push( + cache.arena.alloc(ExprMetaData::Mdata { mdata, child: inner_root }), + ); + }, + + Frame::Cache(e) => { + let e_key = Address::from_blake3_hash(*e.get_hash()); + if let Some(result) = results.last() { + let arena_root = + *cache.arena_roots.last().expect("Cache missing arena root"); + cache + .exprs + .insert(e_key, CachedExpr { expr: result.clone(), arena_root }); } }, } } - result.pop().ok_or(CompileError::CompileExpr) + + results + .pop() + .ok_or(CompileError::UnsupportedExpr { desc: "empty result".into() }) } -pub fn compare_expr( - x: &Expr, - y: &Expr, - mut_ctx: &MutCtx, - x_lvls: &[Name], - y_lvls: &[Name], +/// Compile a Lean DataValue to Ixon DataValue. +fn compile_data_value(dv: &LeanDataValue, stt: &CompileState) -> DataValue { + match dv { + LeanDataValue::OfString(s) => DataValue::OfString(store_string(s, stt)), + LeanDataValue::OfBool(b) => DataValue::OfBool(*b), + LeanDataValue::OfName(n) => DataValue::OfName(compile_name(n, stt)), + LeanDataValue::OfNat(n) => DataValue::OfNat(store_nat(n, stt)), + LeanDataValue::OfInt(i) => { + // Serialize Int and store as blob + let mut bytes = Vec::new(); + match i { + crate::ix::env::Int::OfNat(n) => { + bytes.push(0); + bytes.extend_from_slice(&n.to_le_bytes()); + }, + crate::ix::env::Int::NegSucc(n) => { + bytes.push(1); + bytes.extend_from_slice(&n.to_le_bytes()); + }, + } + DataValue::OfInt(stt.env.store_blob(bytes)) + }, + LeanDataValue::OfSyntax(syn) => { + // Serialize syntax and store as blob + let bytes = serialize_syntax(syn, stt); + DataValue::OfSyntax(stt.env.store_blob(bytes)) + }, + } +} + +/// Serialize a Lean Syntax to bytes. +fn serialize_syntax(syn: &LeanSyntax, stt: &CompileState) -> Vec { + let mut bytes = Vec::new(); + serialize_syntax_inner(syn, stt, &mut bytes); + bytes +} + +fn serialize_syntax_inner( + syn: &LeanSyntax, stt: &CompileState, -) -> Result { - match (x.as_data(), y.as_data()) { - (ExprData::Mvar(..), _) | (_, ExprData::Mvar(..)) => { - Err(CompileError::ExprMVar) + bytes: &mut Vec, +) { + match syn { + LeanSyntax::Missing => bytes.push(0), + LeanSyntax::Node(info, kind, args) => { + bytes.push(1); + serialize_source_info(info, stt, bytes); + bytes.extend_from_slice(compile_name(kind, stt).as_bytes()); + Tag0::new(args.len() as u64).put(bytes); + for arg in args { + serialize_syntax_inner(arg, stt, bytes); + } }, - (ExprData::Fvar(..), _) | (_, ExprData::Fvar(..)) => { - Err(CompileError::ExprFVar) + LeanSyntax::Atom(info, val) => { + bytes.push(2); + serialize_source_info(info, stt, bytes); + bytes.extend_from_slice(store_string(val, stt).as_bytes()); }, - (ExprData::Mdata(_, x, _), ExprData::Mdata(_, y, _)) => { - compare_expr(x, y, mut_ctx, x_lvls, y_lvls, stt) + LeanSyntax::Ident(info, raw_val, val, preresolved) => { + bytes.push(3); + serialize_source_info(info, stt, bytes); + serialize_substring(raw_val, stt, bytes); + bytes.extend_from_slice(compile_name(val, stt).as_bytes()); + Tag0::new(preresolved.len() as u64).put(bytes); + for pr in preresolved { + serialize_preresolved(pr, stt, bytes); + } }, - (ExprData::Mdata(_, x, _), _) => { - compare_expr(x, y, mut_ctx, x_lvls, y_lvls, stt) + } +} + +fn serialize_source_info( + info: &LeanSourceInfo, + stt: &CompileState, + bytes: &mut Vec, +) { + match info { + LeanSourceInfo::Original(leading, leading_pos, trailing, trailing_pos) => { + bytes.push(0); + serialize_substring(leading, stt, bytes); + // u64::MAX sentinel for positions that overflow u64 (should never happen in practice) + Tag0::new(leading_pos.to_u64().unwrap_or(u64::MAX)).put(bytes); + serialize_substring(trailing, stt, bytes); + Tag0::new(trailing_pos.to_u64().unwrap_or(u64::MAX)).put(bytes); }, - (_, ExprData::Mdata(_, y, _)) => { - compare_expr(x, y, mut_ctx, x_lvls, y_lvls, stt) + LeanSourceInfo::Synthetic(start, end, canonical) => { + bytes.push(1); + Tag0::new(start.to_u64().unwrap_or(u64::MAX)).put(bytes); + Tag0::new(end.to_u64().unwrap_or(u64::MAX)).put(bytes); + bytes.push(if *canonical { 1 } else { 0 }); }, - (ExprData::Bvar(x, _), ExprData::Bvar(y, _)) => Ok(SOrd::cmp(x, y)), - (ExprData::Bvar(..), _) => Ok(SOrd::lt(true)), - (_, ExprData::Bvar(..)) => Ok(SOrd::gt(true)), - (ExprData::Sort(x, _), ExprData::Sort(y, _)) => { - compare_level(x, y, x_lvls, y_lvls) + LeanSourceInfo::None => bytes.push(2), + } +} + +fn serialize_substring( + ss: &LeanSubstring, + stt: &CompileState, + bytes: &mut Vec, +) { + bytes.extend_from_slice(store_string(&ss.str, stt).as_bytes()); + Tag0::new(ss.start_pos.to_u64().unwrap_or(u64::MAX)).put(bytes); + Tag0::new(ss.stop_pos.to_u64().unwrap_or(u64::MAX)).put(bytes); +} + +fn serialize_preresolved( + pr: &SyntaxPreresolved, + stt: &CompileState, + bytes: &mut Vec, +) { + match pr { + SyntaxPreresolved::Namespace(n) => { + bytes.push(0); + bytes.extend_from_slice(compile_name(n, stt).as_bytes()); }, - (ExprData::Sort(..), _) => Ok(SOrd::lt(true)), - (_, ExprData::Sort(..)) => Ok(SOrd::gt(true)), - (ExprData::Const(x, xls, _), ExprData::Const(y, yls, _)) => { - let us = - SOrd::try_zip(|a, b| compare_level(a, b, x_lvls, y_lvls), xls, yls)?; - if us.ordering != Ordering::Equal { - Ok(us) - } else if x == y { - Ok(SOrd::eq(true)) - } else { - match (mut_ctx.get(x), mut_ctx.get(y)) { - (Some(nx), Some(ny)) => Ok(SOrd::weak_cmp(nx, ny)), - (Some(..), _) => Ok(SOrd::lt(true)), - (None, Some(..)) => Ok(SOrd::gt(true)), - (None, None) => { - let xa = compile_ref(x, stt)?; - let ya = compile_ref(y, stt)?; - Ok(SOrd::cmp(&xa.data, &ya.data)) - }, - } + SyntaxPreresolved::Decl(n, fields) => { + bytes.push(1); + bytes.extend_from_slice(compile_name(n, stt).as_bytes()); + Tag0::new(fields.len() as u64).put(bytes); + for f in fields { + bytes.extend_from_slice(store_string(f, stt).as_bytes()); } }, - - (ExprData::Const(..), _) => Ok(SOrd::lt(true)), - (_, ExprData::Const(..)) => Ok(SOrd::gt(true)), - (ExprData::App(xl, xr, _), ExprData::App(yl, yr, _)) => SOrd::try_compare( - compare_expr(xl, yl, mut_ctx, x_lvls, y_lvls, stt)?, - || compare_expr(xr, yr, mut_ctx, x_lvls, y_lvls, stt), - ), - (ExprData::App(..), _) => Ok(SOrd::lt(true)), - (_, ExprData::App(..)) => Ok(SOrd::gt(true)), - (ExprData::Lam(_, xt, xb, _, _), ExprData::Lam(_, yt, yb, _, _)) => { - SOrd::try_compare( - compare_expr(xt, yt, mut_ctx, x_lvls, y_lvls, stt)?, - || compare_expr(xb, yb, mut_ctx, x_lvls, y_lvls, stt), - ) - }, - (ExprData::Lam(..), _) => Ok(SOrd::lt(true)), - (_, ExprData::Lam(..)) => Ok(SOrd::gt(true)), - ( - ExprData::ForallE(_, xt, xb, _, _), - ExprData::ForallE(_, yt, yb, _, _), - ) => SOrd::try_compare( - compare_expr(xt, yt, mut_ctx, x_lvls, y_lvls, stt)?, - || compare_expr(xb, yb, mut_ctx, x_lvls, y_lvls, stt), - ), - (ExprData::ForallE(..), _) => Ok(SOrd::lt(true)), - (_, ExprData::ForallE(..)) => Ok(SOrd::gt(true)), - ( - ExprData::LetE(_, xt, xv, xb, _, _), - ExprData::LetE(_, yt, yv, yb, _, _), - ) => SOrd::try_zip( - |a, b| compare_expr(a, b, mut_ctx, x_lvls, y_lvls, stt), - &[xt, xv, xb], - &[yt, yv, yb], - ), - (ExprData::LetE(..), _) => Ok(SOrd::lt(true)), - (_, ExprData::LetE(..)) => Ok(SOrd::gt(true)), - (ExprData::Lit(x, _), ExprData::Lit(y, _)) => Ok(SOrd::cmp(x, y)), - (ExprData::Lit(..), _) => Ok(SOrd::lt(true)), - (_, ExprData::Lit(..)) => Ok(SOrd::gt(true)), - (ExprData::Proj(tnx, ix, tx, _), ExprData::Proj(tny, iy, ty, _)) => { - let tn = match (mut_ctx.get(tnx), mut_ctx.get(tny)) { - (Some(nx), Some(ny)) => Ok(SOrd::weak_cmp(nx, ny)), - (Some(..), _) => Ok(SOrd::lt(true)), - (None, Some(..)) => Ok(SOrd::gt(true)), - (None, None) => { - let xa = compile_ref(tnx, stt)?; - let ya = compile_ref(tny, stt)?; - Ok(SOrd::cmp(&xa.data, &ya.data)) - }, - }?; - SOrd::try_compare(tn, || { - SOrd::try_compare(SOrd::cmp(ix, iy), || { - compare_expr(tx, ty, mut_ctx, x_lvls, y_lvls, stt) + } +} + +// =========================================================================== +// Sharing analysis helper +// =========================================================================== + +/// Result of sharing analysis including size statistics. +struct SharingResult { + /// Rewritten expressions with Share nodes + rewritten: Vec>, + /// Shared subexpressions + sharing: Vec>, + /// Hash-consed size: sum of unique subterm base_sizes + hash_consed_size: usize, +} + +/// Compute the hash-consed size from the info_map. +/// This is the theoretical size if each unique subterm were stored once in a content-addressed store. +/// Each unique expression = 32-byte key + value (with 32-byte hash references for children/externals). +fn compute_hash_consed_size( + info_map: &std::collections::HashMap, +) -> usize { + info_map.values().map(|info| info.hash_consed_size).sum() +} + +/// Apply sharing analysis to a set of expressions. +/// Returns the rewritten expressions, sharing vector, and hash-consed size. +/// +/// Hash-consed size tracking is controlled by the global `TRACK_HASH_CONSED_SIZE` flag. +fn apply_sharing_with_stats( + exprs: Vec>, + block_name: Option<&str>, +) -> SharingResult { + let track = TRACK_HASH_CONSED_SIZE.load(AtomicOrdering::Relaxed); + let analyze = ANALYZE_SHARING.load(AtomicOrdering::Relaxed); + let (info_map, ptr_to_hash) = analyze_block(&exprs, track); + + // Compute hash-consed size (sum from info_map, which is 0 if tracking disabled) + let hash_consed_size = compute_hash_consed_size(&info_map); + + // Output detailed analysis if requested and this is a large block + // Use threshold to catch pathological cases + if analyze && info_map.len() > 5000 { + let name = block_name.unwrap_or(""); + let stats = sharing::analyze_sharing_stats(&info_map); + eprintln!( + "\n=== Sharing analysis for block {:?} with {} unique subterms ===", + name, + info_map.len() + ); + eprintln!("{}", stats); + eprintln!( + "hash_consed_size from analysis: {} bytes (tracking={})", + hash_consed_size, track + ); + } + + // Early exit if no sharing opportunities (< 2 repeated subterms) + let has_candidates = info_map.values().any(|info| info.usage_count >= 2); + if !has_candidates { + return SharingResult { + rewritten: exprs, + sharing: Vec::new(), + hash_consed_size, + }; + } + + let shared_hashes = decide_sharing(&info_map); + + // Early exit if nothing to share + if shared_hashes.is_empty() { + return SharingResult { + rewritten: exprs, + sharing: Vec::new(), + hash_consed_size, + }; + } + + let (rewritten, sharing) = + build_sharing_vec(&exprs, &shared_hashes, &ptr_to_hash, &info_map); + SharingResult { rewritten, sharing, hash_consed_size } +} + +/// Apply sharing analysis to a set of expressions (without stats). +/// Returns the rewritten expressions and the sharing vector. +#[cfg(test)] +fn apply_sharing(exprs: Vec>) -> (Vec>, Vec>) { + let result = apply_sharing_with_stats(exprs, None); + (result.rewritten, result.sharing) +} + +/// Result of applying sharing to a singleton constant. +struct SingletonSharingResult { + /// The compiled Constant + constant: Constant, + /// Hash-consed size of expressions + hash_consed_size: usize, +} + +/// Apply sharing to a Definition and return a Constant with stats. +#[allow(clippy::needless_pass_by_value)] +fn apply_sharing_to_definition_with_stats( + def: Definition, + refs: Vec
, + univs: Vec>, + block_name: Option<&str>, +) -> SingletonSharingResult { + let result = apply_sharing_with_stats( + vec![def.typ.clone(), def.value.clone()], + block_name, + ); + let def = Definition { + kind: def.kind, + safety: def.safety, + lvls: def.lvls, + typ: result.rewritten[0].clone(), + value: result.rewritten[1].clone(), + }; + let constant = + Constant::with_tables(ConstantInfo::Defn(def), result.sharing, refs, univs); + SingletonSharingResult { constant, hash_consed_size: result.hash_consed_size } +} + +/// Apply sharing to an Axiom and return a Constant with stats. +#[allow(clippy::needless_pass_by_value)] +fn apply_sharing_to_axiom_with_stats( + ax: Axiom, + refs: Vec
, + univs: Vec>, +) -> SingletonSharingResult { + let result = apply_sharing_with_stats(vec![ax.typ.clone()], None); + let ax = Axiom { + is_unsafe: ax.is_unsafe, + lvls: ax.lvls, + typ: result.rewritten[0].clone(), + }; + let constant = + Constant::with_tables(ConstantInfo::Axio(ax), result.sharing, refs, univs); + SingletonSharingResult { constant, hash_consed_size: result.hash_consed_size } +} + +/// Apply sharing to a Quotient and return a Constant with stats. +#[allow(clippy::needless_pass_by_value)] +fn apply_sharing_to_quotient_with_stats( + quot: Quotient, + refs: Vec
, + univs: Vec>, +) -> SingletonSharingResult { + let result = apply_sharing_with_stats(vec![quot.typ.clone()], None); + let quot = Quotient { + kind: quot.kind, + lvls: quot.lvls, + typ: result.rewritten[0].clone(), + }; + let constant = Constant::with_tables( + ConstantInfo::Quot(quot), + result.sharing, + refs, + univs, + ); + SingletonSharingResult { constant, hash_consed_size: result.hash_consed_size } +} + +/// Apply sharing to a Recursor and return a Constant with stats. +fn apply_sharing_to_recursor_with_stats( + rec: Recursor, + refs: Vec
, + univs: Vec>, +) -> SingletonSharingResult { + // Collect all expressions: typ + all rule rhs + let mut exprs = vec![rec.typ.clone()]; + for rule in &rec.rules { + exprs.push(rule.rhs.clone()); + } + + let result = apply_sharing_with_stats(exprs, None); + let typ = result.rewritten[0].clone(); + let rules: Vec = rec + .rules + .into_iter() + .zip(result.rewritten.into_iter().skip(1)) + .map(|(r, rhs)| RecursorRule { fields: r.fields, rhs }) + .collect(); + + let rec = Recursor { + k: rec.k, + is_unsafe: rec.is_unsafe, + lvls: rec.lvls, + params: rec.params, + indices: rec.indices, + motives: rec.motives, + minors: rec.minors, + typ, + rules, + }; + let constant = + Constant::with_tables(ConstantInfo::Recr(rec), result.sharing, refs, univs); + SingletonSharingResult { constant, hash_consed_size: result.hash_consed_size } +} + +/// Result of applying sharing to a mutual block. +struct MutualBlockSharingResult { + /// The compiled Constant + constant: Constant, + /// Hash-consed size of all expressions in the block + hash_consed_size: usize, +} + +/// Apply sharing to a mutual block and return a Constant with stats. +fn apply_sharing_to_mutual_block( + mut_consts: Vec, + refs: Vec
, + univs: Vec>, + block_name: Option<&str>, +) -> MutualBlockSharingResult { + // Collect all expressions from all constants in the block + let mut all_exprs: Vec> = Vec::new(); + let mut layout: Vec<(MutConstKind, Vec)> = Vec::new(); + + for mc in &mut_consts { + let (kind, indices) = match mc { + IxonMutConst::Defn(def) => { + let start = all_exprs.len(); + all_exprs.push(def.typ.clone()); + all_exprs.push(def.value.clone()); + (MutConstKind::Defn, vec![start, start + 1]) + }, + IxonMutConst::Indc(ind) => { + let start = all_exprs.len(); + all_exprs.push(ind.typ.clone()); + let mut indices = vec![start]; + for ctor in &ind.ctors { + indices.push(all_exprs.len()); + all_exprs.push(ctor.typ.clone()); + } + (MutConstKind::Indc, indices) + }, + IxonMutConst::Recr(rec) => { + let start = all_exprs.len(); + all_exprs.push(rec.typ.clone()); + let mut indices = vec![start]; + for rule in &rec.rules { + indices.push(all_exprs.len()); + all_exprs.push(rule.rhs.clone()); + } + (MutConstKind::Recr, indices) + }, + }; + layout.push((kind, indices)); + } + + // Apply sharing analysis to all expressions at once (with stats) + let sharing_result = apply_sharing_with_stats(all_exprs, block_name); + let rewritten = sharing_result.rewritten; + let sharing = sharing_result.sharing; + let expr_hash_consed_size = sharing_result.hash_consed_size; + + // Compute structural overhead for hash-consed store. + // In a hash-consed store, each unique node = 32-byte key + value (with 32-byte refs for children). + // This accounts for Inductive/Constructor/Recursor/Definition structures, not just expressions. + let mut structural_overhead: usize = 0; + for mc in &mut_consts { + match mc { + IxonMutConst::Defn(_) => { + // Definition: 32-byte key + (kind + safety + lvls + typ_ref + value_ref) + // = 32 + (1 + 1 + 8 + 32 + 32) = 106 bytes + structural_overhead += 106; + }, + IxonMutConst::Indc(ind) => { + // Inductive: 32-byte key + (flags + lvls + params + indices + nested + typ_ref + ctors_array_ref) + // = 32 + (3 + 8 + 8 + 8 + 8 + 32 + 32) = 131 bytes + structural_overhead += 131; + // Each Constructor: 32-byte key + (flags + lvls + cidx + params + fields + typ_ref) + // = 32 + (1 + 8 + 8 + 8 + 8 + 32) = 97 bytes + structural_overhead += ind.ctors.len() * 97; + // Ctors array: 32-byte key + N * 32-byte refs + structural_overhead += 32 + ind.ctors.len() * 32; + }, + IxonMutConst::Recr(rec) => { + // Recursor: 32-byte key + (k + flags + lvls + params + indices + motives + minors + typ_ref + rules_array_ref) + // = 32 + (1 + 1 + 8 + 8 + 8 + 8 + 8 + 32 + 32) = 138 bytes + structural_overhead += 138; + // Each RecursorRule: 32-byte key + (fields + rhs_ref) = 32 + (8 + 32) = 72 bytes + structural_overhead += rec.rules.len() * 72; + // Rules array: 32-byte key + N * 32-byte refs + structural_overhead += 32 + rec.rules.len() * 32; + }, + } + } + // Refs: each is a 32-byte address (already content-addressed, no extra overhead) + // Univs: each unique univ needs storage. Estimate 32 + 8 bytes per univ. + structural_overhead += univs.len() * 40; + + let hash_consed_size = expr_hash_consed_size + structural_overhead; + + // Rebuild the constants with rewritten expressions + let mut new_consts = Vec::with_capacity(mut_consts.len()); + for (i, mc) in mut_consts.into_iter().enumerate() { + let (kind, indices) = &layout[i]; + let new_mc = match (kind, mc) { + (MutConstKind::Defn, IxonMutConst::Defn(def)) => { + IxonMutConst::Defn(Definition { + kind: def.kind, + safety: def.safety, + lvls: def.lvls, + typ: rewritten[indices[0]].clone(), + value: rewritten[indices[1]].clone(), }) - }) - }, + }, + (MutConstKind::Indc, IxonMutConst::Indc(ind)) => { + let new_ctors: Vec = ind + .ctors + .into_iter() + .enumerate() + .map(|(ci, ctor)| Constructor { + is_unsafe: ctor.is_unsafe, + lvls: ctor.lvls, + cidx: ctor.cidx, + params: ctor.params, + fields: ctor.fields, + typ: rewritten[indices[ci + 1]].clone(), + }) + .collect(); + IxonMutConst::Indc(Inductive { + recr: ind.recr, + refl: ind.refl, + is_unsafe: ind.is_unsafe, + lvls: ind.lvls, + params: ind.params, + indices: ind.indices, + nested: ind.nested, + typ: rewritten[indices[0]].clone(), + ctors: new_ctors, + }) + }, + (MutConstKind::Recr, IxonMutConst::Recr(rec)) => { + let new_rules: Vec = rec + .rules + .into_iter() + .enumerate() + .map(|(ri, rule)| RecursorRule { + fields: rule.fields, + rhs: rewritten[indices[ri + 1]].clone(), + }) + .collect(); + IxonMutConst::Recr(Recursor { + k: rec.k, + is_unsafe: rec.is_unsafe, + lvls: rec.lvls, + params: rec.params, + indices: rec.indices, + motives: rec.motives, + minors: rec.minors, + typ: rewritten[indices[0]].clone(), + rules: new_rules, + }) + }, + _ => unreachable!("layout mismatch"), + }; + new_consts.push(new_mc); } + + let constant = + Constant::with_tables(ConstantInfo::Muts(new_consts), sharing, refs, univs); + MutualBlockSharingResult { constant, hash_consed_size } +} + +/// Helper enum for tracking mutual constant layout during sharing. +#[derive(Clone, Copy)] +enum MutConstKind { + Defn, + Indc, + Recr, } -pub fn compile_defn( + +// =========================================================================== +// Constant compilation +// =========================================================================== + +/// Compile a Definition. +/// Arena persists across type + value within a constant. +fn compile_definition( def: &Def, mut_ctx: &MutCtx, cache: &mut BlockCache, stt: &CompileState, -) -> Result<(Definition, Metadata), CompileError> { - let univ_ctx = &def.level_params; - let n = compile_name(&def.name, stt)?; - let ls = - def.level_params.iter().map(|n| compile_name(n, stt)).try_collect()?; - let t = compile_expr(&def.typ, univ_ctx, mut_ctx, cache, stt)?; - let v = compile_expr(&def.value, univ_ctx, mut_ctx, cache, stt)?; - let all = def.all.iter().map(|n| compile_name(n, stt)).try_collect()?; +) -> Result<(Definition, ConstantMeta), CompileError> { + let univ_params = &def.level_params; + + // Compile type expression (arena grows) + let typ = compile_expr(&def.typ, univ_params, mut_ctx, cache, stt)?; + let type_root = *cache.arena_roots.last().expect("missing type arena root"); + + // Compile value expression (arena continues growing) + let value = compile_expr(&def.value, univ_params, mut_ctx, cache, stt)?; + let value_root = *cache.arena_roots.last().expect("missing value arena root"); + + // Take arena and clear for next constant + let arena = std::mem::take(&mut cache.arena); + cache.arena_roots.clear(); + cache.exprs.clear(); + + let name_addr = compile_name(&def.name, stt); + let lvl_addrs: Vec
= + univ_params.iter().map(|n| compile_name(n, stt)).collect(); + let all_addrs: Vec
= + def.all.iter().map(|n| compile_name(n, stt)).collect(); + let ctx_addrs: Vec
= + ctx_to_all(mut_ctx).iter().map(|n| compile_name(n, stt)).collect(); + let data = Definition { kind: def.kind, safety: def.safety, - lvls: Nat(def.level_params.len().into()), - typ: t.data, - value: v.data, + lvls: def.level_params.len() as u64, + typ, + value, }; - let meta = Metadata { - nodes: vec![ - Metadatum::Link(n), - Metadatum::Links(ls), - Metadatum::Hints(def.hints), - Metadatum::Link(t.meta), - Metadatum::Link(v.meta), - Metadatum::Links(all), - ], + + let meta = ConstantMeta::Def { + name: name_addr, + lvls: lvl_addrs, + hints: def.hints, + all: all_addrs, + ctx: ctx_addrs, + arena, + type_root, + value_root, }; + Ok((data, meta)) } -pub fn compile_rule( - rule: &RecursorRule, - univ_ctx: &[Name], +/// Compile a RecursorRule. +fn compile_recursor_rule( + rule: &LeanRecursorRule, + univ_params: &[Name], mut_ctx: &MutCtx, cache: &mut BlockCache, stt: &CompileState, -) -> Result<(ixon::RecursorRule, Address, Address), CompileError> { - let n = compile_name(&rule.ctor, stt)?; - let rhs = compile_expr(&rule.rhs, univ_ctx, mut_ctx, cache, stt)?; - let data = - ixon::RecursorRule { fields: rule.n_fields.clone(), rhs: rhs.data }; - Ok((data, n, rhs.meta)) +) -> Result<(RecursorRule, Address), CompileError> { + let rhs = compile_expr(&rule.rhs, univ_params, mut_ctx, cache, stt)?; + let ctor_addr = compile_name(&rule.ctor, stt); + let fields = nat_to_u64(&rule.n_fields, "n_fields too large")?; + + Ok((RecursorRule { fields, rhs }, ctor_addr)) } -pub fn compile_recr( - recr: &Rec, +/// Compile a Recursor. +/// Arena grows across type and all rule RHS expressions. +fn compile_recursor( + rec: &Rec, mut_ctx: &MutCtx, cache: &mut BlockCache, stt: &CompileState, -) -> Result<(Recursor, Metadata), CompileError> { - let univ_ctx = &recr.cnst.level_params; - let n = compile_name(&recr.cnst.name, stt)?; - let ls: Vec
= recr - .cnst - .level_params - .iter() - .map(|n| compile_name(n, stt)) - .try_collect()?; - let t = compile_expr(&recr.cnst.typ, univ_ctx, mut_ctx, cache, stt)?; - let mut rule_data = Vec::with_capacity(recr.rules.len()); - let mut rule_meta = Vec::with_capacity(recr.rules.len()); - for rule in recr.rules.iter() { - let (rr, rn, rm) = compile_rule(rule, univ_ctx, mut_ctx, cache, stt)?; - rule_data.push(rr); - rule_meta.push((rn, rm)); - } - let all = recr.all.iter().map(|n| compile_name(n, stt)).try_collect()?; +) -> Result<(Recursor, ConstantMeta), CompileError> { + let univ_params = &rec.cnst.level_params; + + // Compile type expression + let typ = compile_expr(&rec.cnst.typ, univ_params, mut_ctx, cache, stt)?; + let type_root = + *cache.arena_roots.last().expect("missing recursor type arena root"); + + let mut rules = Vec::with_capacity(rec.rules.len()); + let mut rule_addrs = Vec::new(); + let mut rule_roots = Vec::new(); + for rule in &rec.rules { + let (r, ctor_addr) = + compile_recursor_rule(rule, univ_params, mut_ctx, cache, stt)?; + rule_roots + .push(*cache.arena_roots.last().expect("missing rule arena root")); + rule_addrs.push(ctor_addr); + rules.push(r); + } + + // Take arena and clear for next constant + let arena = std::mem::take(&mut cache.arena); + cache.arena_roots.clear(); + cache.exprs.clear(); + + let name_addr = compile_name(&rec.cnst.name, stt); + let lvl_addrs: Vec
= + univ_params.iter().map(|n| compile_name(n, stt)).collect(); + let data = Recursor { - k: recr.k, - is_unsafe: recr.is_unsafe, - lvls: Nat(recr.cnst.level_params.len().into()), - params: recr.num_params.clone(), - indices: recr.num_indices.clone(), - motives: recr.num_motives.clone(), - minors: recr.num_minors.clone(), - typ: t.data, - rules: rule_data, + k: rec.k, + is_unsafe: rec.is_unsafe, + lvls: univ_params.len() as u64, + params: nat_to_u64(&rec.num_params, "num_params too large")?, + indices: nat_to_u64(&rec.num_indices, "num_indices too large")?, + motives: nat_to_u64(&rec.num_motives, "num_motives too large")?, + minors: nat_to_u64(&rec.num_minors, "num_minors too large")?, + typ, + rules, }; - let meta = Metadata { - nodes: vec![ - Metadatum::Link(n), - Metadatum::Links(ls), - Metadatum::Link(t.meta), - Metadatum::Map(rule_meta), - Metadatum::Links(all), - ], + + let all_addrs: Vec
= + rec.all.iter().map(|n| compile_name(n, stt)).collect(); + let ctx_addrs: Vec
= + ctx_to_all(mut_ctx).iter().map(|n| compile_name(n, stt)).collect(); + + let meta = ConstantMeta::Rec { + name: name_addr, + lvls: lvl_addrs, + rules: rule_addrs, + all: all_addrs, + ctx: ctx_addrs, + arena, + type_root, + rule_roots, }; + Ok((data, meta)) } -fn compile_ctor( +/// Compile a Constructor. +/// Each constructor gets its own arena. +fn compile_constructor( ctor: &ConstructorVal, - induct: Address, mut_ctx: &MutCtx, cache: &mut BlockCache, stt: &CompileState, -) -> Result<(Constructor, Metadata), CompileError> { - let n = compile_name(&ctor.cnst.name, stt)?; - let univ_ctx = &ctor.cnst.level_params; - let ls = ctor - .cnst - .level_params - .iter() - .map(|n| compile_name(n, stt)) - .try_collect()?; - let t = compile_expr(&ctor.cnst.typ, univ_ctx, mut_ctx, cache, stt)?; +) -> Result<(Constructor, ConstantMeta), CompileError> { + let univ_params = &ctor.cnst.level_params; + + let typ = compile_expr(&ctor.cnst.typ, univ_params, mut_ctx, cache, stt)?; + let type_root = + *cache.arena_roots.last().expect("missing ctor type arena root"); + + // Take arena for this constructor + let arena = std::mem::take(&mut cache.arena); + cache.arena_roots.clear(); + cache.exprs.clear(); + + let name_addr = compile_name(&ctor.cnst.name, stt); + let lvl_addrs: Vec
= + univ_params.iter().map(|n| compile_name(n, stt)).collect(); + let induct_addr = compile_name(&ctor.induct, stt); + let data = Constructor { is_unsafe: ctor.is_unsafe, - lvls: Nat(ctor.cnst.level_params.len().into()), - cidx: ctor.cidx.clone(), - params: ctor.num_params.clone(), - fields: ctor.num_fields.clone(), - typ: t.data, + lvls: univ_params.len() as u64, + cidx: nat_to_u64(&ctor.cidx, "cidx too large")?, + params: nat_to_u64(&ctor.num_params, "ctor num_params too large")?, + fields: nat_to_u64(&ctor.num_fields, "num_fields too large")?, + typ, }; - let meta = Metadata { - nodes: vec![ - Metadatum::Link(n), - Metadatum::Links(ls), - Metadatum::Link(t.meta), - Metadatum::Link(induct), - ], + + let meta = ConstantMeta::Ctor { + name: name_addr, + lvls: lvl_addrs, + induct: induct_addr, + arena, + type_root, }; - Ok((data, meta)) -} -pub fn mk_indc( - ind: &InductiveVal, - env: &Arc, -) -> Result { - let mut ctors = Vec::with_capacity(ind.ctors.len()); - for ctor_name in &ind.ctors { - if let Some(ConstantInfo::CtorInfo(c)) = env.as_ref().get(ctor_name) { - ctors.push(c.clone()); - } else { - return Err(CompileError::MkIndc); - }; - } - Ok(Ind { ind: ind.clone(), ctors }) + Ok((data, meta)) } -pub fn compile_indc( +/// Compile an Inductive. +/// The inductive type gets its own arena. Each constructor gets its own arena +/// via compile_constructor. No CtorMeta duplication — ConstantMeta::Indc only +/// stores constructor name addresses. +fn compile_inductive( ind: &Ind, mut_ctx: &MutCtx, cache: &mut BlockCache, stt: &CompileState, -) -> Result<(Inductive, FxHashMap), CompileError> { - let n = compile_name(&ind.ind.cnst.name, stt)?; - let univ_ctx = &ind.ind.cnst.level_params; - let ls = ind - .ind - .cnst - .level_params - .iter() - .map(|n| compile_name(n, stt)) - .try_collect()?; - let t = compile_expr(&ind.ind.cnst.typ, univ_ctx, mut_ctx, cache, stt)?; - let mut ctor_data = Vec::with_capacity(ind.ctors.len()); - let mut ctor_meta = Vec::with_capacity(ind.ctors.len()); - let mut meta_map = FxHashMap::default(); - for ctor in ind.ctors.iter() { - let (cd, cm) = compile_ctor(ctor, n.clone(), mut_ctx, cache, stt)?; - ctor_data.push(cd); - let cn = compile_name(&ctor.cnst.name, stt)?; - let cm = store_meta(&cm, stt)?; - ctor_meta.push(cm.clone()); - meta_map.insert(cn, cm); - } - let all = ind.ind.all.iter().map(|n| compile_name(n, stt)).try_collect()?; +) -> Result<(Inductive, ConstantMeta, Vec), CompileError> { + let univ_params = &ind.ind.cnst.level_params; + + // Compile inductive type + let typ = compile_expr(&ind.ind.cnst.typ, univ_params, mut_ctx, cache, stt)?; + let type_root = + *cache.arena_roots.last().expect("missing indc type arena root"); + + // Take arena for inductive type + let indc_arena = std::mem::take(&mut cache.arena); + cache.arena_roots.clear(); + cache.exprs.clear(); + + let mut ctors = Vec::with_capacity(ind.ctors.len()); + let mut ctor_const_metas = Vec::new(); + let mut ctor_name_addrs = Vec::new(); + for ctor in &ind.ctors { + let (c, m) = compile_constructor(ctor, mut_ctx, cache, stt)?; + let ctor_name_addr = compile_name(&ctor.cnst.name, stt); + ctor_name_addrs.push(ctor_name_addr); + ctor_const_metas.push(m); + ctors.push(c); + } + + let name_addr = compile_name(&ind.ind.cnst.name, stt); + let lvl_addrs: Vec
= + univ_params.iter().map(|n| compile_name(n, stt)).collect(); + let data = Inductive { recr: ind.ind.is_rec, refl: ind.ind.is_reflexive, is_unsafe: ind.ind.is_unsafe, - lvls: Nat(ind.ind.cnst.level_params.len().into()), - params: ind.ind.num_params.clone(), - indices: ind.ind.num_indices.clone(), - nested: ind.ind.num_nested.clone(), - typ: t.data, - ctors: ctor_data, + lvls: univ_params.len() as u64, + params: nat_to_u64(&ind.ind.num_params, "inductive num_params too large")?, + indices: nat_to_u64( + &ind.ind.num_indices, + "inductive num_indices too large", + )?, + nested: nat_to_u64(&ind.ind.num_nested, "num_nested too large")?, + typ, + ctors, }; - let meta = Metadata { - nodes: vec![ - Metadatum::Link(n.clone()), - Metadatum::Links(ls), - Metadatum::Link(t.meta), - Metadatum::Links(ctor_meta), - Metadatum::Links(all), - ], + + let all_addrs: Vec
= + ind.ind.all.iter().map(|n| compile_name(n, stt)).collect(); + let ctx_addrs: Vec
= + ctx_to_all(mut_ctx).iter().map(|n| compile_name(n, stt)).collect(); + + let meta = ConstantMeta::Indc { + name: name_addr, + lvls: lvl_addrs, + ctors: ctor_name_addrs, + all: all_addrs, + ctx: ctx_addrs, + arena: indc_arena, + type_root, }; - let m = store_meta(&meta, stt)?; - meta_map.insert(n, m); - Ok((data, meta_map)) + + Ok((data, meta, ctor_const_metas)) } -pub fn compile_quot( - val: &QuotVal, +/// Compile an Axiom. +fn compile_axiom( + val: &AxiomVal, cache: &mut BlockCache, stt: &CompileState, -) -> Result<(Quotient, Metadata), CompileError> { - let n = compile_name(&val.cnst.name, stt)?; - let univ_ctx = &val.cnst.level_params; - let ls = - val.cnst.level_params.iter().map(|n| compile_name(n, stt)).try_collect()?; - let t = - compile_expr(&val.cnst.typ, univ_ctx, &MutCtx::default(), cache, stt)?; - let data = Quotient { - kind: val.kind, - lvls: Nat(val.cnst.level_params.len().into()), - typ: t.data, - }; - let meta = Metadata { - nodes: vec![ - Metadatum::Link(n), - Metadatum::Links(ls), - Metadatum::Link(t.meta), - ], - }; +) -> Result<(Axiom, ConstantMeta), CompileError> { + let univ_params = &val.cnst.level_params; + + let typ = + compile_expr(&val.cnst.typ, univ_params, &MutCtx::default(), cache, stt)?; + let type_root = + *cache.arena_roots.last().expect("missing axiom type arena root"); + + let arena = std::mem::take(&mut cache.arena); + cache.arena_roots.clear(); + cache.exprs.clear(); + + let name_addr = compile_name(&val.cnst.name, stt); + let lvl_addrs: Vec
= + univ_params.iter().map(|n| compile_name(n, stt)).collect(); + + let data = + Axiom { is_unsafe: val.is_unsafe, lvls: univ_params.len() as u64, typ }; + + let meta = + ConstantMeta::Axio { name: name_addr, lvls: lvl_addrs, arena, type_root }; + Ok((data, meta)) } -pub fn compile_axio( - val: &AxiomVal, +/// Compile a Quotient. +fn compile_quotient( + val: &QuotVal, cache: &mut BlockCache, stt: &CompileState, -) -> Result<(Axiom, Metadata), CompileError> { - let n = compile_name(&val.cnst.name, stt)?; - let univ_ctx = &val.cnst.level_params; - let ls = - val.cnst.level_params.iter().map(|n| compile_name(n, stt)).try_collect()?; - let t = - compile_expr(&val.cnst.typ, univ_ctx, &MutCtx::default(), cache, stt)?; - let data = Axiom { - is_unsafe: val.is_unsafe, - lvls: Nat(val.cnst.level_params.len().into()), - typ: t.data, - }; - let meta = Metadata { - nodes: vec![ - Metadatum::Link(n), - Metadatum::Links(ls), - Metadatum::Link(t.meta), - ], - }; +) -> Result<(Quotient, ConstantMeta), CompileError> { + let univ_params = &val.cnst.level_params; + + let typ = + compile_expr(&val.cnst.typ, univ_params, &MutCtx::default(), cache, stt)?; + let type_root = + *cache.arena_roots.last().expect("missing quot type arena root"); + + let arena = std::mem::take(&mut cache.arena); + cache.arena_roots.clear(); + cache.exprs.clear(); + + let name_addr = compile_name(&val.cnst.name, stt); + let lvl_addrs: Vec
= + univ_params.iter().map(|n| compile_name(n, stt)).collect(); + + let data = Quotient { kind: val.kind, lvls: univ_params.len() as u64, typ }; + + let meta = + ConstantMeta::Quot { name: name_addr, lvls: lvl_addrs, arena, type_root }; + Ok((data, meta)) } -pub fn compare_defn( - x: &Def, - y: &Def, - mut_ctx: &MutCtx, - stt: &CompileState, -) -> Result { - SOrd::try_compare( - SOrd { strong: true, ordering: x.kind.cmp(&y.kind) }, - || { - SOrd::try_compare( - SOrd::cmp(&x.level_params.len(), &y.level_params.len()), - || { - SOrd::try_compare( - compare_expr( - &x.typ, - &y.typ, - mut_ctx, - &x.level_params, - &y.level_params, - stt, - )?, - || { - compare_expr( - &x.value, - &y.value, - mut_ctx, - &x.level_params, - &y.level_params, - stt, - ) - }, - ) - }, - ) - }, - ) +// =========================================================================== +// Mutual block compilation +// =========================================================================== + +/// Result of compiling a mutual block. +struct CompiledMutualBlock { + /// The compiled Constant + constant: Constant, + /// Content-addressed hash + addr: Address, + /// Hash-consed size (theoretical minimum with perfect DAG sharing) + hash_consed_size: usize, + /// Serialized size (actual bytes) + serialized_size: usize, } -pub fn compare_ctor_inner( - x: &ConstructorVal, - y: &ConstructorVal, - mut_ctx: &MutCtx, - stt: &CompileState, -) -> Result { - SOrd::try_compare( - SOrd::cmp(&x.cnst.level_params.len(), &y.cnst.level_params.len()), - || { - SOrd::try_compare(SOrd::cmp(&x.cidx, &y.cidx), || { - SOrd::try_compare(SOrd::cmp(&x.num_params, &y.num_params), || { - SOrd::try_compare(SOrd::cmp(&x.num_fields, &y.num_fields), || { - compare_expr( - &x.cnst.typ, - &y.cnst.typ, - mut_ctx, - &x.cnst.level_params, - &y.cnst.level_params, - stt, - ) - }) - }) - }) - }, - ) +/// Compile a mutual block with block-level sharing. +/// Returns the Constant, its content-addressed hash, and size statistics. +fn compile_mutual_block( + mut_consts: Vec, + refs: Vec
, + univs: Vec>, + block_name: Option<&str>, +) -> CompiledMutualBlock { + // Apply sharing analysis across all expressions in the mutual block + let result = + apply_sharing_to_mutual_block(mut_consts, refs, univs, block_name); + let constant = result.constant; + let hash_consed_size = result.hash_consed_size; + + // Compute content address and serialized size + let mut bytes = Vec::new(); + constant.put(&mut bytes); + let serialized_size = bytes.len(); + let addr = Address::hash(&bytes); + + CompiledMutualBlock { constant, addr, hash_consed_size, serialized_size } } -pub fn compare_ctor( - x: &ConstructorVal, +/// Create Inductive from InductiveVal and Env. +pub fn mk_indc( + ind: &InductiveVal, + env: &Arc, +) -> Result { + let mut ctors = Vec::with_capacity(ind.ctors.len()); + for ctor_name in &ind.ctors { + if let Some(LeanConstantInfo::CtorInfo(c)) = env.as_ref().get(ctor_name) { + ctors.push(c.clone()); + } else { + return Err(CompileError::MissingConstant { name: ctor_name.pretty() }); + } + } + Ok(Ind { ind: ind.clone(), ctors }) +} + +// =========================================================================== +// Alpha-invariant comparison and sorting +// +// These functions establish a canonical ordering for constants within mutual +// blocks. Since names are not alpha-invariant, we compare by structure: +// universe levels, expressions, field counts, etc. The `SOrd` return type +// tracks whether the comparison is "strong" (based solely on alpha-invariant +// data) or "weak" (needed a name-based tiebreaker). +// =========================================================================== + +/// Compare two universe levels structurally, using level parameter position +/// (not name) for `Param` comparisons. +pub fn compare_level( + x: &Level, + y: &Level, + x_ctx: &[Name], + y_ctx: &[Name], +) -> Result { + match (x.as_data(), y.as_data()) { + (LevelData::Mvar(..), _) | (_, LevelData::Mvar(..)) => { + Err(CompileError::UnsupportedExpr { + desc: "level metavariable in comparison".into(), + }) + }, + (LevelData::Zero(_), LevelData::Zero(_)) => Ok(SOrd::eq(true)), + (LevelData::Zero(_), _) => Ok(SOrd::lt(true)), + (_, LevelData::Zero(_)) => Ok(SOrd::gt(true)), + (LevelData::Succ(x, _), LevelData::Succ(y, _)) => { + compare_level(x, y, x_ctx, y_ctx) + }, + (LevelData::Succ(_, _), _) => Ok(SOrd::lt(true)), + (_, LevelData::Succ(_, _)) => Ok(SOrd::gt(true)), + (LevelData::Max(xl, xr, _), LevelData::Max(yl, yr, _)) => { + SOrd::try_compare(compare_level(xl, yl, x_ctx, y_ctx)?, || { + compare_level(xr, yr, x_ctx, y_ctx) + }) + }, + (LevelData::Max(_, _, _), _) => Ok(SOrd::lt(true)), + (_, LevelData::Max(_, _, _)) => Ok(SOrd::gt(true)), + (LevelData::Imax(xl, xr, _), LevelData::Imax(yl, yr, _)) => { + SOrd::try_compare(compare_level(xl, yl, x_ctx, y_ctx)?, || { + compare_level(xr, yr, x_ctx, y_ctx) + }) + }, + (LevelData::Imax(_, _, _), _) => Ok(SOrd::lt(true)), + (_, LevelData::Imax(_, _, _)) => Ok(SOrd::gt(true)), + (LevelData::Param(x, _), LevelData::Param(y, _)) => { + match ( + x_ctx.iter().position(|n| x == n), + y_ctx.iter().position(|n| y == n), + ) { + (Some(xi), Some(yi)) => Ok(SOrd::cmp(&xi, &yi)), + (None, _) => Err(CompileError::UnknownUnivParam { + curr: String::new(), + param: x.pretty(), + }), + (_, None) => Err(CompileError::UnknownUnivParam { + curr: String::new(), + param: y.pretty(), + }), + } + }, + } +} + +/// Compare two Lean expressions structurally for canonical ordering. +/// Strips `Mdata` wrappers, compares by constructor tag, then recurses +/// into subexpressions. Constants are compared by address (or mutual index). +pub fn compare_expr( + x: &LeanExpr, + y: &LeanExpr, + mut_ctx: &MutCtx, + x_lvls: &[Name], + y_lvls: &[Name], + stt: &CompileState, +) -> Result { + match (x.as_data(), y.as_data()) { + (ExprData::Mvar(..), _) | (_, ExprData::Mvar(..)) => { + Err(CompileError::UnsupportedExpr { + desc: "metavariable in comparison".into(), + }) + }, + (ExprData::Fvar(..), _) | (_, ExprData::Fvar(..)) => { + Err(CompileError::UnsupportedExpr { desc: "fvar in comparison".into() }) + }, + (ExprData::Mdata(_, x, _), ExprData::Mdata(_, y, _)) => { + compare_expr(x, y, mut_ctx, x_lvls, y_lvls, stt) + }, + (ExprData::Mdata(_, x, _), _) => { + compare_expr(x, y, mut_ctx, x_lvls, y_lvls, stt) + }, + (_, ExprData::Mdata(_, y, _)) => { + compare_expr(x, y, mut_ctx, x_lvls, y_lvls, stt) + }, + (ExprData::Bvar(x, _), ExprData::Bvar(y, _)) => Ok(SOrd::cmp(x, y)), + (ExprData::Bvar(..), _) => Ok(SOrd::lt(true)), + (_, ExprData::Bvar(..)) => Ok(SOrd::gt(true)), + (ExprData::Sort(x, _), ExprData::Sort(y, _)) => { + compare_level(x, y, x_lvls, y_lvls) + }, + (ExprData::Sort(..), _) => Ok(SOrd::lt(true)), + (_, ExprData::Sort(..)) => Ok(SOrd::gt(true)), + (ExprData::Const(x, xls, _), ExprData::Const(y, yls, _)) => { + let us = + SOrd::try_zip(|a, b| compare_level(a, b, x_lvls, y_lvls), xls, yls)?; + if us.ordering != Ordering::Equal { + Ok(us) + } else if x == y { + Ok(SOrd::eq(true)) + } else { + match (mut_ctx.get(x), mut_ctx.get(y)) { + (Some(nx), Some(ny)) => Ok(SOrd::weak_cmp(nx, ny)), + (Some(..), _) => Ok(SOrd::lt(true)), + (None, Some(..)) => Ok(SOrd::gt(true)), + (None, None) => { + // Compare by address + let xa = stt.name_to_addr.get(x); + let ya = stt.name_to_addr.get(y); + match (xa, ya) { + (Some(xa), Some(ya)) => Ok(SOrd::cmp(xa.value(), ya.value())), + _ => { + Ok(SOrd::cmp(x.get_hash().as_bytes(), y.get_hash().as_bytes())) + }, + } + }, + } + } + }, + (ExprData::Const(..), _) => Ok(SOrd::lt(true)), + (_, ExprData::Const(..)) => Ok(SOrd::gt(true)), + (ExprData::App(xl, xr, _), ExprData::App(yl, yr, _)) => SOrd::try_compare( + compare_expr(xl, yl, mut_ctx, x_lvls, y_lvls, stt)?, + || compare_expr(xr, yr, mut_ctx, x_lvls, y_lvls, stt), + ), + (ExprData::App(..), _) => Ok(SOrd::lt(true)), + (_, ExprData::App(..)) => Ok(SOrd::gt(true)), + (ExprData::Lam(_, xt, xb, _, _), ExprData::Lam(_, yt, yb, _, _)) => { + SOrd::try_compare( + compare_expr(xt, yt, mut_ctx, x_lvls, y_lvls, stt)?, + || compare_expr(xb, yb, mut_ctx, x_lvls, y_lvls, stt), + ) + }, + (ExprData::Lam(..), _) => Ok(SOrd::lt(true)), + (_, ExprData::Lam(..)) => Ok(SOrd::gt(true)), + ( + ExprData::ForallE(_, xt, xb, _, _), + ExprData::ForallE(_, yt, yb, _, _), + ) => SOrd::try_compare( + compare_expr(xt, yt, mut_ctx, x_lvls, y_lvls, stt)?, + || compare_expr(xb, yb, mut_ctx, x_lvls, y_lvls, stt), + ), + (ExprData::ForallE(..), _) => Ok(SOrd::lt(true)), + (_, ExprData::ForallE(..)) => Ok(SOrd::gt(true)), + ( + ExprData::LetE(_, xt, xv, xb, _, _), + ExprData::LetE(_, yt, yv, yb, _, _), + ) => SOrd::try_zip( + |a, b| compare_expr(a, b, mut_ctx, x_lvls, y_lvls, stt), + &[xt, xv, xb], + &[yt, yv, yb], + ), + (ExprData::LetE(..), _) => Ok(SOrd::lt(true)), + (_, ExprData::LetE(..)) => Ok(SOrd::gt(true)), + (ExprData::Lit(x, _), ExprData::Lit(y, _)) => Ok(SOrd::cmp(x, y)), + (ExprData::Lit(..), _) => Ok(SOrd::lt(true)), + (_, ExprData::Lit(..)) => Ok(SOrd::gt(true)), + (ExprData::Proj(tnx, ix, tx, _), ExprData::Proj(tny, iy, ty, _)) => { + let tn: Result = + match (mut_ctx.get(tnx), mut_ctx.get(tny)) { + (Some(nx), Some(ny)) => Ok(SOrd::weak_cmp(nx, ny)), + (Some(..), _) => Ok(SOrd::lt(true)), + (None, Some(..)) => Ok(SOrd::gt(true)), + (None, None) => { + let xa = stt.name_to_addr.get(tnx); + let ya = stt.name_to_addr.get(tny); + match (xa, ya) { + (Some(xa), Some(ya)) => Ok(SOrd::cmp(xa.value(), ya.value())), + _ => Ok(SOrd::cmp( + tnx.get_hash().as_bytes(), + tny.get_hash().as_bytes(), + )), + } + }, + }; + let tn = tn?; + SOrd::try_compare(tn, || { + SOrd::try_compare(SOrd::cmp(ix, iy), || { + compare_expr(tx, ty, mut_ctx, x_lvls, y_lvls, stt) + }) + }) + }, + } +} + +// =========================================================================== +// Constant-level comparison and sorting +// =========================================================================== + +/// Compare two definitions by kind, level parameter count, type, then value. +pub fn compare_defn( + x: &Def, + y: &Def, + mut_ctx: &MutCtx, + stt: &CompileState, +) -> Result { + SOrd::try_compare( + SOrd { strong: true, ordering: x.kind.cmp(&y.kind) }, + || { + SOrd::try_compare( + SOrd::cmp(&x.level_params.len(), &y.level_params.len()), + || { + SOrd::try_compare( + compare_expr( + &x.typ, + &y.typ, + mut_ctx, + &x.level_params, + &y.level_params, + stt, + )?, + || { + compare_expr( + &x.value, + &y.value, + mut_ctx, + &x.level_params, + &y.level_params, + stt, + ) + }, + ) + }, + ) + }, + ) +} + +/// Compare two constructors by level params, cidx, params, fields, then type. +pub fn compare_ctor_inner( + x: &ConstructorVal, + y: &ConstructorVal, + mut_ctx: &MutCtx, + stt: &CompileState, +) -> Result { + SOrd::try_compare( + SOrd::cmp(&x.cnst.level_params.len(), &y.cnst.level_params.len()), + || { + SOrd::try_compare(SOrd::cmp(&x.cidx, &y.cidx), || { + SOrd::try_compare(SOrd::cmp(&x.num_params, &y.num_params), || { + SOrd::try_compare(SOrd::cmp(&x.num_fields, &y.num_fields), || { + compare_expr( + &x.cnst.typ, + &y.cnst.typ, + mut_ctx, + &x.cnst.level_params, + &y.cnst.level_params, + stt, + ) + }) + }) + }) + }, + ) +} + +/// Compare two constructors with result caching (keyed by name pair). +pub fn compare_ctor( + x: &ConstructorVal, y: &ConstructorVal, mut_ctx: &MutCtx, cache: &mut BlockCache, @@ -1033,6 +1694,7 @@ pub fn compare_ctor( } } +/// Compare two inductives by params, indices, constructor count, type, then constructors. pub fn compare_indc( x: &Ind, y: &Ind, @@ -1076,9 +1738,10 @@ pub fn compare_indc( ) } +/// Compare two recursor rules by field count, then RHS expression. pub fn compare_recr_rule( - x: &RecursorRule, - y: &RecursorRule, + x: &LeanRecursorRule, + y: &LeanRecursorRule, mut_ctx: &MutCtx, x_lvls: &[Name], y_lvls: &[Name], @@ -1089,6 +1752,7 @@ pub fn compare_recr_rule( }) } +/// Compare two recursors by params, indices, motives, minors, k, type, then rules. pub fn compare_recr( x: &Rec, y: &Rec, @@ -1138,6 +1802,18 @@ pub fn compare_recr( ) } +/// Returns a kind ordinal for cross-kind comparison of mutual constants. +fn mut_const_kind(c: &MutConst) -> u8 { + match c { + MutConst::Defn(_) => 0, + MutConst::Indc(_) => 1, + MutConst::Recr(_) => 2, + } +} + +/// Compare two mutual constants with caching. Dispatches to the appropriate +/// type-specific comparator (defn, indc, recr). Different-kind constants +/// are ordered by kind tag. pub fn compare_const( x: &MutConst, y: &MutConst, @@ -1145,35 +1821,29 @@ pub fn compare_const( cache: &mut BlockCache, stt: &CompileState, ) -> Result { - let key = if x.name() <= y.name() { - (x.name(), y.name()) + let (key, reversed) = if x.name() <= y.name() { + ((x.name(), y.name()), false) } else { - (y.name(), x.name()) + ((y.name(), x.name()), true) }; if let Some(so) = cache.cmps.get(&key) { - Ok(*so) - } else { - let so: SOrd = match (x, y) { - (MutConst::Defn(x), MutConst::Defn(y)) => { - compare_defn(x, y, mut_ctx, stt)? - }, - (MutConst::Indc(x), MutConst::Indc(y)) => { - compare_indc(x, y, mut_ctx, cache, stt)? - }, - (MutConst::Recr(x), MutConst::Recr(y)) => { - compare_recr(x, y, mut_ctx, stt)? - }, - (MutConst::Defn(_) | MutConst::Indc(_) | MutConst::Recr(_), _) => { - SOrd::lt(true) - }, - }; - if so.strong { - cache.cmps.insert(key, so.ordering); - } - Ok(so.ordering) + return Ok(if reversed { so.reverse() } else { *so }); + } + let so: SOrd = match (x, y) { + (MutConst::Defn(x), MutConst::Defn(y)) => compare_defn(x, y, mut_ctx, stt)?, + (MutConst::Indc(x), MutConst::Indc(y)) => { + compare_indc(x, y, mut_ctx, cache, stt)? + }, + (MutConst::Recr(x), MutConst::Recr(y)) => compare_recr(x, y, mut_ctx, stt)?, + _ => SOrd::cmp(&mut_const_kind(x), &mut_const_kind(y)), + }; + if so.strong { + cache.cmps.insert(key, so.ordering); } + Ok(if reversed { so.ordering.reverse() } else { so.ordering }) } +/// Check if two mutual constants are structurally equal. pub fn eq_const( x: &MutConst, y: &MutConst, @@ -1185,6 +1855,8 @@ pub fn eq_const( Ok(ordering == Ordering::Equal) } +/// Group consecutive equal elements in a sorted slice. Assumes the input +/// is already sorted by the same relation used for equality testing. pub fn group_by( items: Vec<&T>, mut eq: F, @@ -1211,6 +1883,7 @@ where Ok(groups) } +/// Merge two sorted sequences of mutual constants into one sorted sequence. pub fn merge<'a>( left: Vec<&'a MutConst>, right: Vec<&'a MutConst>, @@ -1247,6 +1920,7 @@ pub fn merge<'a>( Ok(result) } +/// Merge-sort mutual constants using structural comparison. pub fn sort_by_compare<'a>( items: &[&'a MutConst], ctx: &MutCtx, @@ -1263,21 +1937,27 @@ pub fn sort_by_compare<'a>( merge(left, right, ctx, cache, stt) } +/// Sort mutual constants into a canonical ordering and group equal ones. +/// Uses iterative refinement: sort by structure, group equals, re-sort with +/// updated mutual context indices, until the partition stabilizes. pub fn sort_consts<'a>( cs: &[&'a MutConst], cache: &mut BlockCache, stt: &CompileState, ) -> Result>, CompileError> { - //println!("sort_consts"); - let mut classes = vec![cs.to_owned()]; + // Sort by name first to match Lean's behavior and ensure deterministic output + let mut sorted_cs: Vec<&'a MutConst> = cs.to_owned(); + sorted_cs.sort_by_key(|x| x.name()); + let mut classes = vec![sorted_cs]; loop { - //println!("sort_consts loop"); let ctx = MutConst::ctx(&classes); let mut new_classes: Vec> = vec![]; for class in classes.iter() { match class.len() { 0 => { - return Err(CompileError::SortConsts); + return Err(CompileError::InvalidMutualBlock { + reason: "empty class".into(), + }); }, 1 => { new_classes.push(class.clone()); @@ -1300,368 +1980,2487 @@ pub fn sort_consts<'a>( } } -fn compile_mut_consts( - classes: Vec>, - mut_ctx: &MutCtx, +// =========================================================================== +// Main compilation entry points +// =========================================================================== + +/// Compile a single constant. +pub fn compile_const( + name: &Name, + all: &NameSet, + lean_env: &Arc, cache: &mut BlockCache, stt: &CompileState, -) -> Result<(Ixon, FxHashMap), CompileError> { - //println!("compile_mut_consts"); - let mut data = vec![]; - let mut meta = FxHashMap::default(); - for class in classes { - let mut class_data = vec![]; - for cnst in class { - match cnst { - MutConst::Indc(x) => { - let (i, m) = compile_indc(x, mut_ctx, cache, stt)?; - class_data.push(ixon::MutConst::Indc(i)); - meta.extend(m); - }, - MutConst::Defn(x) => { - let (d, m) = compile_defn(x, mut_ctx, cache, stt)?; - class_data.push(ixon::MutConst::Defn(d)); - meta.insert(compile_name(&x.name, stt)?, store_meta(&m, stt)?); - }, - MutConst::Recr(x) => { - let (r, m) = compile_recr(x, mut_ctx, cache, stt)?; - class_data.push(ixon::MutConst::Recr(r)); - meta.insert(compile_name(&x.cnst.name, stt)?, store_meta(&m, stt)?); - }, - } - } - if class_data.is_empty() || !class_data.iter().all(|x| x == &class_data[0]) - { - return Err(CompileError::CompileMutConsts(class_data.clone())); - } else { - data.push(class_data[0].clone()) - } +) -> Result { + if let Some(cached) = stt.name_to_addr.get(name) { + return Ok(cached.clone()); } - Ok((Ixon::Muts(data), meta)) -} -pub fn compile_mutual( - mutual: &MutConst, - all: &NameSet, - env: &Arc, - cache: &mut BlockCache, - stt: &CompileState, -) -> Result<(Ixon, Ixon), CompileError> { - //println!("compile_mutual"); - if all.len() == 1 && matches!(&mutual, MutConst::Defn(_) | MutConst::Recr(_)) - { - match mutual { - MutConst::Defn(defn) => { - //println!("compile_mutual defn"); - let mut_ctx = MutConst::single_ctx(defn.name.clone()); - let (data, meta) = compile_defn(defn, &mut_ctx, cache, stt)?; - Ok((Ixon::Defn(data), Ixon::Meta(meta))) - }, - MutConst::Recr(recr) => { - //println!("compile_mutual recr"); - let mut_ctx = MutConst::single_ctx(recr.cnst.name.clone()); - let (data, meta) = compile_recr(recr, &mut_ctx, cache, stt)?; - Ok((Ixon::Recr(data), Ixon::Meta(meta))) - }, - _ => { - //println!("compile_mutual unreachable"); - unreachable!() + let cnst = lean_env + .get(name) + .ok_or_else(|| CompileError::MissingConstant { name: name.pretty() })?; + + // Helper: compile a single definition/theorem/opaque (non-mutual case). + fn compile_single_def( + name: &Name, + def: &Def, + cache: &mut BlockCache, + stt: &CompileState, + ) -> Result { + let mut_ctx = MutConst::single_ctx(def.name.clone()); + let (data, meta) = compile_definition(def, &mut_ctx, cache, stt)?; + let refs: Vec
= cache.refs.iter().cloned().collect(); + let univs: Vec> = cache.univs.iter().cloned().collect(); + let name_str = name.pretty(); + let result = apply_sharing_to_definition_with_stats( + data, + refs, + univs, + Some(&name_str), + ); + let mut bytes = Vec::new(); + result.constant.put(&mut bytes); + let serialized_size = bytes.len(); + let addr = Address::hash(&bytes); + stt.env.store_const(addr.clone(), result.constant); + stt.env.register_name(name.clone(), Named::new(addr.clone(), meta)); + stt.block_stats.insert( + name.clone(), + BlockSizeStats { + hash_consed_size: result.hash_consed_size, + serialized_size, + const_count: 1, }, - } - } else { - //println!("compile_mutual else"); - let mut cs = Vec::new(); - for name in all { - let Some(const_info) = env.get(name) else { - return Err(CompileError::CompileMutual); - }; - let mut_const = match const_info { - ConstantInfo::InductInfo(val) => { - //println!("compile_mutual InductInfo"); - MutConst::Indc(mk_indc(val, env)?) - }, - ConstantInfo::DefnInfo(val) => { - //println!("compile_mutual DefnInfo"); - MutConst::Defn(Def::mk_defn(val)) - }, - ConstantInfo::OpaqueInfo(val) => { - //println!("compile_mutual OpaqueInfo"); - MutConst::Defn(Def::mk_opaq(val)) - }, - ConstantInfo::ThmInfo(val) => { - //println!("compile_mutual ThmInfo"); - MutConst::Defn(Def::mk_theo(val)) - }, - ConstantInfo::RecInfo(val) => { - //println!("compile_mutual RecInfo"); - MutConst::Recr(val.clone()) - }, - _ => { - //println!("compile_mutual continue"); - continue; - }, - }; - cs.push(mut_const); - } - let mut_consts = - sort_consts(&cs.iter().collect::>(), cache, stt)?; - let mut_meta: Vec> = mut_consts - .iter() - .map(|m| m.iter().map(|c| compile_name(&c.name(), stt)).try_collect()) - .try_collect()?; - let mut_ctx = MutConst::ctx(&mut_consts); - let (data, metas) = compile_mut_consts(mut_consts, &mut_ctx, cache, stt)?; - let ctx = mut_ctx - .iter() - .map(|(n, i)| Ok((compile_name(n, stt)?, store_nat(i, stt)?))) - .try_collect()?; - let block = MetaAddress { - data: store_ixon(&data, stt)?, - meta: store_meta( - &Metadata { - nodes: vec![ - Metadatum::Muts(mut_meta), - Metadatum::Map(ctx), - Metadatum::Map(metas.clone().into_iter().collect()), - ], - }, - stt, - )?, - }; - stt.blocks.insert(block.clone()); - let mut ret: Option<(Ixon, Ixon)> = None; - for c in cs { - let idx = mut_ctx.get(&c.name()).ok_or(CompileError::CompileMutual2)?; - let n = compile_name(&c.name(), stt)?; - let meta = match metas.get(&n) { - Some(m) => Ok(Metadata { - nodes: vec![ - Metadatum::Link(block.meta.clone()), - Metadatum::Link(m.clone()), - ], - }), - None => Err(CompileError::CompileMutual3), - }?; - let data = match c { - MutConst::Defn(..) => Ixon::DPrj(DefinitionProj { - idx: idx.clone(), - block: block.data.clone(), - }), - MutConst::Indc(..) => Ixon::IPrj(InductiveProj { - idx: idx.clone(), - block: block.data.clone(), - }), - MutConst::Recr(..) => Ixon::RPrj(RecursorProj { - idx: idx.clone(), - block: block.data.clone(), - }), - }; - let addr = MetaAddress { - data: store_ixon(&data, stt)?, - meta: store_meta(&meta, stt)?, - }; - stt.consts.insert(c.name(), addr.clone()); - if c.name() == mutual.name() { - ret = Some((data, Ixon::Meta(meta))); - } - for ctor in c.ctors() { - let cdata = Ixon::CPrj(ConstructorProj { - idx: idx.clone(), - cidx: ctor.cidx.clone(), - block: block.data.clone(), - }); - let cn = compile_name(&ctor.cnst.name, stt)?; - let cmeta = match metas.get(&cn) { - Some(m) => Ok(Metadata { - nodes: vec![ - Metadatum::Link(block.meta.clone()), - Metadatum::Link(m.clone()), - ], - }), - None => Err(CompileError::CompileMutual4), - }?; - let caddr = MetaAddress { - data: store_ixon(&cdata, stt)?, - meta: store_meta(&cmeta, stt)?, - }; - stt.consts.insert(ctor.cnst.name, caddr); - } - } - ret.ok_or(CompileError::CompileMutual5) + ); + Ok(addr) } -} -pub fn compile_const_info( - cnst: &ConstantInfo, - all: &NameSet, - env: &Arc, - cache: &mut BlockCache, - stt: &CompileState, -) -> Result { - match cnst { - ConstantInfo::DefnInfo(val) => { - //println!("compile_const_info def"); - let (d, m) = compile_mutual( - &MutConst::Defn(Def::mk_defn(val)), - all, - env, - cache, - stt, - )?; - Ok(MetaAddress { data: store_ixon(&d, stt)?, meta: store_ixon(&m, stt)? }) - }, - ConstantInfo::OpaqueInfo(val) => { - //println!("compile_const_info opaq"); - let (d, m) = compile_mutual( - &MutConst::Defn(Def::mk_opaq(val)), - all, - env, - cache, - stt, - )?; - Ok(MetaAddress { data: store_ixon(&d, stt)?, meta: store_ixon(&m, stt)? }) + // Handle each constant type + let addr = match cnst { + LeanConstantInfo::DefnInfo(val) => { + if all.len() == 1 { + compile_single_def(name, &Def::mk_defn(val), cache, stt)? + } else { + compile_mutual(name, all, lean_env, cache, stt)? + } }, - ConstantInfo::ThmInfo(val) => { - //println!("compile_const_info theo"); - let (d, m) = compile_mutual( - &MutConst::Defn(Def::mk_theo(val)), - all, - env, - cache, - stt, - )?; - Ok(MetaAddress { data: store_ixon(&d, stt)?, meta: store_ixon(&m, stt)? }) + + LeanConstantInfo::ThmInfo(val) => { + if all.len() == 1 { + compile_single_def(name, &Def::mk_theo(val), cache, stt)? + } else { + compile_mutual(name, all, lean_env, cache, stt)? + } }, - ConstantInfo::CtorInfo(val) => { - //println!("compile_const_info ctor"); - if let Some(ConstantInfo::InductInfo(ind)) = env.as_ref().get(&val.induct) - { - let _ = compile_mutual( - &MutConst::Indc(mk_indc(ind, env)?), - all, - env, - cache, - stt, - )?; - let addr = stt - .consts - .get(&val.cnst.name) - .ok_or(CompileError::CompileConstInfo)?; - Ok(addr.clone()) + + LeanConstantInfo::OpaqueInfo(val) => { + if all.len() == 1 { + compile_single_def(name, &Def::mk_opaq(val), cache, stt)? } else { - Err(CompileError::CompileConstInfo2) + compile_mutual(name, all, lean_env, cache, stt)? } }, - ConstantInfo::InductInfo(val) => { - //println!("compile_const_info ind"); - let (d, m) = compile_mutual( - &MutConst::Indc(mk_indc(val, env)?), - all, - env, - cache, - stt, - )?; - Ok(MetaAddress { data: store_ixon(&d, stt)?, meta: store_ixon(&m, stt)? }) + + LeanConstantInfo::AxiomInfo(val) => { + let (data, meta) = compile_axiom(val, cache, stt)?; + let refs: Vec
= cache.refs.iter().cloned().collect(); + let univs: Vec> = cache.univs.iter().cloned().collect(); + let result = apply_sharing_to_axiom_with_stats(data, refs, univs); + let mut bytes = Vec::new(); + result.constant.put(&mut bytes); + let serialized_size = bytes.len(); + let addr = Address::hash(&bytes); + stt.env.store_const(addr.clone(), result.constant); + stt.env.register_name(name.clone(), Named::new(addr.clone(), meta)); + stt.block_stats.insert( + name.clone(), + BlockSizeStats { + hash_consed_size: result.hash_consed_size, + serialized_size, + const_count: 1, + }, + ); + addr }, - ConstantInfo::RecInfo(val) => { - //println!("compile_const_info rec"); - let (d, m) = - compile_mutual(&MutConst::Recr(val.clone()), all, env, cache, stt)?; - Ok(MetaAddress { data: store_ixon(&d, stt)?, meta: store_ixon(&m, stt)? }) + + LeanConstantInfo::QuotInfo(val) => { + let (data, meta) = compile_quotient(val, cache, stt)?; + let refs: Vec
= cache.refs.iter().cloned().collect(); + let univs: Vec> = cache.univs.iter().cloned().collect(); + let result = apply_sharing_to_quotient_with_stats(data, refs, univs); + let mut bytes = Vec::new(); + result.constant.put(&mut bytes); + let serialized_size = bytes.len(); + let addr = Address::hash(&bytes); + stt.env.store_const(addr.clone(), result.constant); + stt.env.register_name(name.clone(), Named::new(addr.clone(), meta)); + stt.block_stats.insert( + name.clone(), + BlockSizeStats { + hash_consed_size: result.hash_consed_size, + serialized_size, + const_count: 1, + }, + ); + addr }, - ConstantInfo::QuotInfo(val) => { - //println!("compile_const_info quot"); - let (quot, meta) = compile_quot(val, cache, stt)?; - Ok(MetaAddress { - data: store_ixon(&Ixon::Quot(quot), stt)?, - meta: store_ixon(&Ixon::Meta(meta), stt)?, - }) + + LeanConstantInfo::InductInfo(_) => { + compile_mutual(name, all, lean_env, cache, stt)? }, - ConstantInfo::AxiomInfo(val) => { - //println!("compile_const_info axio"); - let (axio, meta) = compile_axio(val, cache, stt)?; - Ok(MetaAddress { - data: store_ixon(&Ixon::Axio(axio), stt)?, - meta: store_ixon(&Ixon::Meta(meta), stt)?, - }) + + LeanConstantInfo::RecInfo(val) => { + if all.len() == 1 { + let mut_ctx = MutConst::single_ctx(val.cnst.name.clone()); + let (data, meta) = compile_recursor(val, &mut_ctx, cache, stt)?; + let refs: Vec
= cache.refs.iter().cloned().collect(); + let univs: Vec> = cache.univs.iter().cloned().collect(); + let result = apply_sharing_to_recursor_with_stats(data, refs, univs); + let mut bytes = Vec::new(); + result.constant.put(&mut bytes); + let serialized_size = bytes.len(); + let addr = Address::hash(&bytes); + stt.env.store_const(addr.clone(), result.constant); + stt.env.register_name(name.clone(), Named::new(addr.clone(), meta)); + stt.block_stats.insert( + name.clone(), + BlockSizeStats { + hash_consed_size: result.hash_consed_size, + serialized_size, + const_count: 1, + }, + ); + addr + } else { + compile_mutual(name, all, lean_env, cache, stt)? + } }, - } + + LeanConstantInfo::CtorInfo(val) => { + // Constructors are compiled as part of their inductive + if let Some(LeanConstantInfo::InductInfo(_)) = lean_env.get(&val.induct) { + let _ = compile_mutual(&val.induct, all, lean_env, cache, stt)?; + stt + .name_to_addr + .get(name) + .ok_or_else(|| CompileError::MissingConstant { name: name.pretty() })? + .clone() + } else { + return Err(CompileError::MissingConstant { + name: val.induct.pretty(), + }); + } + }, + }; + + stt.name_to_addr.insert(name.clone(), addr.clone()); + Ok(addr) } -// -pub fn compile_const( + +/// Compile a mutual block. +fn compile_mutual( name: &Name, all: &NameSet, - env: &Arc, + lean_env: &Arc, cache: &mut BlockCache, stt: &CompileState, -) -> Result { - //println!("compile_const {:?}", name.pretty()); - if let Some(cached) = stt.consts.get(name) { - Ok(cached.clone()) - } else { - let cnst = env.as_ref().get(name).ok_or(CompileError::CompileConst)?; - let addr = compile_const_info(cnst, all, env, cache, stt)?; - stt.consts.insert(name.clone(), addr.clone()); - Ok(addr) +) -> Result { + // Collect all constants in the mutual block + let mut cs = Vec::new(); + for n in all { + let Some(const_info) = lean_env.get(n) else { + return Err(CompileError::MissingConstant { name: n.pretty() }); + }; + let mut_const = match const_info { + LeanConstantInfo::InductInfo(val) => { + MutConst::Indc(mk_indc(val, lean_env)?) + }, + LeanConstantInfo::DefnInfo(val) => MutConst::Defn(Def::mk_defn(val)), + LeanConstantInfo::OpaqueInfo(val) => MutConst::Defn(Def::mk_opaq(val)), + LeanConstantInfo::ThmInfo(val) => MutConst::Defn(Def::mk_theo(val)), + LeanConstantInfo::RecInfo(val) => MutConst::Recr(val.clone()), + _ => continue, + }; + cs.push(mut_const); } -} -pub fn compile_env(env: &Arc) -> Result { - let start_ref_graph = std::time::SystemTime::now(); - let graph = build_ref_graph(env.as_ref()); - println!( - "Ref-graph: {:.2}s", - start_ref_graph.elapsed().unwrap().as_secs_f32() + // Sort constants + let sorted_classes = sort_consts(&cs.iter().collect::>(), cache, stt)?; + let mut_ctx = MutConst::ctx(&sorted_classes); + + // Compile each constant + let mut ixon_mutuals = Vec::new(); + let mut all_metas: FxHashMap = FxHashMap::default(); + + for class in &sorted_classes { + // Only push one representative per equivalence class into ixon_mutuals, + // since alpha-equivalent constants compile to identical data and share + // the same class index in MutConst::ctx. + let mut representative_pushed = false; + for cnst in class { + match cnst { + MutConst::Defn(def) => { + let (data, meta) = compile_definition(def, &mut_ctx, cache, stt)?; + if !representative_pushed { + ixon_mutuals.push(IxonMutConst::Defn(data)); + representative_pushed = true; + } + all_metas.insert(def.name.clone(), meta); + }, + MutConst::Indc(ind) => { + let (data, meta, ctor_metas_vec) = + compile_inductive(ind, &mut_ctx, cache, stt)?; + if !representative_pushed { + ixon_mutuals.push(IxonMutConst::Indc(data)); + representative_pushed = true; + } + // Register per-constructor ConstantMeta::Ctor entries + for (ctor, ctor_meta) in ind.ctors.iter().zip(ctor_metas_vec) { + all_metas.insert(ctor.cnst.name.clone(), ctor_meta); + } + all_metas.insert(ind.ind.cnst.name.clone(), meta); + }, + MutConst::Recr(rec) => { + let (data, meta) = compile_recursor(rec, &mut_ctx, cache, stt)?; + if !representative_pushed { + ixon_mutuals.push(IxonMutConst::Recr(data)); + representative_pushed = true; + } + all_metas.insert(rec.cnst.name.clone(), meta); + }, + } + } + } + + // Create mutual block with sharing + let refs: Vec
= cache.refs.iter().cloned().collect(); + let univs: Vec> = cache.univs.iter().cloned().collect(); + let const_count = ixon_mutuals.len(); + let name_str = name.pretty(); + let compiled = + compile_mutual_block(ixon_mutuals, refs, univs, Some(&name_str)); + let block_addr = compiled.addr.clone(); + stt.env.store_const(block_addr.clone(), compiled.constant); + stt.blocks.insert(block_addr.clone()); + + // Store block size statistics (keyed by low-link name) + stt.block_stats.insert( + name.clone(), + BlockSizeStats { + hash_consed_size: compiled.hash_consed_size, + serialized_size: compiled.serialized_size, + const_count, + }, ); - let start_ground = std::time::SystemTime::now(); - let ungrounded = ground_consts(env.as_ref(), &graph.in_refs); + + // Create projections for each constant + let mut idx = 0u64; + for class in &sorted_classes { + for cnst in class { + let n = cnst.name(); + let meta = all_metas.get(&n).cloned().unwrap_or_default(); + + let proj = match cnst { + MutConst::Defn(_) => { + Constant::new(ConstantInfo::DPrj(DefinitionProj { + idx, + block: block_addr.clone(), + })) + }, + MutConst::Indc(ind) => { + // Register inductive projection + let indc_proj = Constant::new(ConstantInfo::IPrj(InductiveProj { + idx, + block: block_addr.clone(), + })); + let mut proj_bytes = Vec::new(); + indc_proj.put(&mut proj_bytes); + let proj_addr = Address::hash(&proj_bytes); + stt.env.store_const(proj_addr.clone(), indc_proj); + stt.env.register_name( + n.clone(), + Named::new(proj_addr.clone(), meta.clone()), + ); + stt.name_to_addr.insert(n.clone(), proj_addr.clone()); + + // Register constructor projections + for (cidx, ctor) in ind.ctors.iter().enumerate() { + let ctor_meta = + all_metas.get(&ctor.cnst.name).cloned().unwrap_or_default(); + let ctor_proj = + Constant::new(ConstantInfo::CPrj(ConstructorProj { + idx, + cidx: cidx as u64, + block: block_addr.clone(), + })); + let mut ctor_bytes = Vec::new(); + ctor_proj.put(&mut ctor_bytes); + let ctor_addr = Address::hash(&ctor_bytes); + stt.env.store_const(ctor_addr.clone(), ctor_proj); + stt.env.register_name( + ctor.cnst.name.clone(), + Named::new(ctor_addr.clone(), ctor_meta), + ); + stt.name_to_addr.insert(ctor.cnst.name.clone(), ctor_addr); + } + + continue; + }, + MutConst::Recr(_) => Constant::new(ConstantInfo::RPrj(RecursorProj { + idx, + block: block_addr.clone(), + })), + }; + + let mut proj_bytes = Vec::new(); + proj.put(&mut proj_bytes); + let proj_addr = Address::hash(&proj_bytes); + stt.env.store_const(proj_addr.clone(), proj); + stt.env.register_name(n.clone(), Named::new(proj_addr.clone(), meta)); + stt.name_to_addr.insert(n.clone(), proj_addr); + } + idx += 1; + } + + // Return the address for the requested name + stt + .name_to_addr + .get(name) + .ok_or_else(|| CompileError::MissingConstant { name: name.pretty() }) + .map(|r| r.clone()) +} + +/// Compile an entire Lean environment to Ixon format. +/// Work-stealing compilation using crossbeam channels. +/// +/// Instead of processing blocks in waves (which underutilizes cores when wave sizes vary), +/// we use a work queue. When a block completes, it immediately unlocks dependent blocks. +pub fn compile_env( + lean_env: &Arc, +) -> Result { + let graph = build_ref_graph(lean_env.as_ref()); + + let ungrounded = ground_consts(lean_env.as_ref(), &graph.in_refs); if !ungrounded.is_empty() { - for (n, e) in ungrounded { - println!("Ungrounded {:?}: {:?}", n, e); + for (n, e) in &ungrounded { + eprintln!("Ungrounded {:?}: {:?}", n, e); } - return Err(CompileError::UngroundedEnv); + return Err(CompileError::InvalidMutualBlock { + reason: "ungrounded environment".into(), + }); } - println!("Ground: {:.2}s", start_ground.elapsed().unwrap().as_secs_f32()); - let start_sccs = std::time::SystemTime::now(); - let blocks = compute_sccs(&graph.out_refs); - println!("SCCs: {:.2}s", start_sccs.elapsed().unwrap().as_secs_f32()); - let start_compile = std::time::SystemTime::now(); + + let condensed = compute_sccs(&graph.out_refs); + let stt = CompileState::default(); - let remaining: DashMap = DashMap::default(); - - blocks.blocks.par_iter().try_for_each(|(lo, all)| { - let deps = blocks.block_refs.get(lo).ok_or(CompileError::CondenseError)?; - remaining.insert(lo.clone(), (all.clone(), deps.clone())); - Ok::<(), CompileError>(()) - })?; - - //let num_blocks = remaining.len(); - //let mut i = 0; - - while !remaining.is_empty() { - //i += 1; - //let len = remaining.len(); - //let pct = 100f64 - ((len as f64 / num_blocks as f64) * 100f64); - //println!("Wave {i}, {pct}%: {len}/{num_blocks}"); - //println!("Stats {:?}", stt.stats()); - let ready: DashMap = DashMap::default(); - remaining.par_iter().for_each(|entry| { + // Build work-stealing data structures + let total_blocks = condensed.blocks.len(); + + // For each block: (all names in block, remaining dep count) + let block_info: DashMap = DashMap::default(); + + // Reverse deps: name → set of block leaders that depend on this name + let reverse_deps: DashMap> = DashMap::default(); + + // Initialize block info and reverse deps + for (lo, all) in &condensed.blocks { + let deps = + condensed.block_refs.get(lo).ok_or(CompileError::InvalidMutualBlock { + reason: "missing block refs".into(), + })?; + + block_info.insert(lo.clone(), (all.clone(), AtomicUsize::new(deps.len()))); + + // Register reverse dependencies + for dep_name in deps { + reverse_deps.entry(dep_name.clone()).or_default().push(lo.clone()); + } + } + + // Shared ready queue: blocks that are ready to compile + // Use a Mutex for simplicity - workers push newly-ready blocks here + let ready_queue: std::sync::Mutex> = + std::sync::Mutex::new(Vec::new()); + + // Initialize with blocks that have no dependencies + { + let mut queue = ready_queue.lock().unwrap(); + for entry in block_info.iter() { let lo = entry.key(); - let (all, deps) = entry.value(); - if deps.iter().all(|x| stt.consts.contains_key(x)) { - ready.insert(lo.clone(), all.clone()); + let (all, dep_count) = entry.value(); + if dep_count.load(AtomicOrdering::SeqCst) == 0 { + queue.push((lo.clone(), all.clone())); } - }); - //println!("Wave {i} ready {}", ready.len()); + } + } + + // Track completed count for termination + let completed = AtomicUsize::new(0); + + // Error storage for propagating errors from workers + let error: std::sync::Mutex> = + std::sync::Mutex::new(None); + + // Condvar for signaling workers when new work is available or completion + let work_available = std::sync::Condvar::new(); + + // Use scoped threads to borrow from parent scope + let num_threads = + thread::available_parallelism().map(|n| n.get()).unwrap_or(4); + + // Compile blocks in parallel using work-stealing - ready.par_iter().try_for_each(|entry| { - let mut cache = BlockCache::default(); - compile_const(entry.key(), entry.value(), env, &mut cache, &stt)?; - remaining.remove(entry.key()); - Ok::<(), CompileError>(()) - })?; + // Take references to shared data outside the loop + let error_ref = &error; + let stt_ref = &stt; + let reverse_deps_ref = &reverse_deps; + let block_info_ref = &block_info; + let completed_ref = &completed; + let ready_queue_ref = &ready_queue; + let condvar_ref = &work_available; + + thread::scope(|s| { + // Spawn worker threads + for _ in 0..num_threads { + s.spawn(move || { + loop { + // Try to get work from the ready queue + let work = { + let mut queue = ready_queue_ref.lock().unwrap(); + queue.pop() + }; + + match work { + Some((lo, all)) => { + // Check if we should stop due to error + if error_ref.lock().unwrap().is_some() { + return; + } + + // Track time for slow block detection + let block_start = std::time::Instant::now(); + + // Compile this block + let mut cache = BlockCache::default(); + if let Err(e) = + compile_const(&lo, &all, lean_env, &mut cache, stt_ref) + { + let mut err_guard = error_ref.lock().unwrap(); + if err_guard.is_none() { + *err_guard = Some(e); + } + return; + } + + // Check for slow blocks + let elapsed = block_start.elapsed(); + if elapsed.as_secs_f32() > 1.0 { + eprintln!( + "Slow block {:?} ({} consts): {:.2}s", + lo.pretty(), + all.len(), + elapsed.as_secs_f32() + ); + } + + // Collect newly-ready blocks + let mut newly_ready = Vec::new(); + + // For each name in this block, decrement dep counts for dependents + for name in &all { + if let Some(dependents) = reverse_deps_ref.get(name) { + for dependent_lo in dependents.value() { + if let Some(entry) = block_info_ref.get(dependent_lo) { + let (dep_all, dep_count) = entry.value(); + let prev = dep_count.fetch_sub(1, AtomicOrdering::SeqCst); + if prev == 1 { + // This block is now ready + newly_ready + .push((dependent_lo.clone(), dep_all.clone())); + } + } + } + } + } + + // Add newly-ready blocks to the queue and notify waiting workers + if !newly_ready.is_empty() { + let mut queue = ready_queue_ref.lock().unwrap(); + queue.extend(newly_ready); + condvar_ref.notify_all(); + } + + completed_ref.fetch_add(1, AtomicOrdering::SeqCst); + // Wake all workers so they can check for completion + condvar_ref.notify_all(); + }, + None => { + // No work available - check if we're done + if completed_ref.load(AtomicOrdering::SeqCst) == total_blocks { + return; + } + // Check for errors + if error_ref.lock().unwrap().is_some() { + return; + } + // Wait for new work to become available + let queue = ready_queue_ref.lock().unwrap(); + let _ = condvar_ref + .wait_timeout(queue, std::time::Duration::from_millis(10)) + .unwrap(); + }, + } + } + }); + } + }); + + // Check for errors + if let Some(e) = error.into_inner().unwrap() { + return Err(e); + } + + // Verify completion + let final_completed = completed.load(AtomicOrdering::SeqCst); + if final_completed != total_blocks { + // Find what's still blocked + let mut blocked_count = 0; + for entry in block_info.iter() { + let (_, dep_count) = entry.value(); + if dep_count.load(AtomicOrdering::SeqCst) > 0 { + blocked_count += 1; + if blocked_count <= 5 { + eprintln!( + "Still blocked: {:?} with {} deps remaining", + entry.key().pretty(), + dep_count.load(AtomicOrdering::SeqCst) + ); + } + } + } + return Err(CompileError::InvalidMutualBlock { + reason: "circular dependency or missing constant".into(), + }); } - println!("Compile: {:.2}s", start_compile.elapsed().unwrap().as_secs_f32()); + Ok(stt) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::ix::env::{BinderInfo, Expr as LeanExpr, Level}; + + #[test] + fn test_compile_univ_zero() { + let level = Level::zero(); + let mut cache = BlockCache::default(); + let univ = compile_univ(&level, &[], &mut cache).unwrap(); + assert!(matches!(univ.as_ref(), Univ::Zero)); + } + + #[test] + fn test_compile_univ_succ() { + let level = Level::succ(Level::zero()); + let mut cache = BlockCache::default(); + let univ = compile_univ(&level, &[], &mut cache).unwrap(); + match univ.as_ref() { + Univ::Succ(inner) => assert!(matches!(inner.as_ref(), Univ::Zero)), + _ => panic!("expected Succ"), + } + } + + #[test] + fn test_compile_univ_param() { + let name = Name::str(Name::anon(), "u".to_string()); + let level = Level::param(name.clone()); + let mut cache = BlockCache::default(); + let univ = compile_univ(&level, &[name], &mut cache).unwrap(); + assert!(matches!(univ.as_ref(), Univ::Var(0))); + } + + #[test] + fn test_compile_univ_max() { + let level = Level::max(Level::zero(), Level::succ(Level::zero())); + let mut cache = BlockCache::default(); + let univ = compile_univ(&level, &[], &mut cache).unwrap(); + match univ.as_ref() { + Univ::Max(a, b) => { + assert!(matches!(a.as_ref(), Univ::Zero)); + match b.as_ref() { + Univ::Succ(inner) => assert!(matches!(inner.as_ref(), Univ::Zero)), + _ => panic!("expected Succ"), + } + }, + _ => panic!("expected Max"), + } + } + + #[test] + fn test_store_string() { + let stt = CompileState::default(); + let addr1 = store_string("hello", &stt); + let addr2 = store_string("hello", &stt); + // Same content should give same address + assert_eq!(addr1, addr2); + // Check we can retrieve it + let bytes = stt.env.get_blob(&addr1).unwrap(); + assert_eq!(bytes, b"hello"); + } + + #[test] + fn test_store_nat() { + let stt = CompileState::default(); + let n = Nat::from(42u64); + let addr = store_nat(&n, &stt); + let bytes = stt.env.get_blob(&addr).unwrap(); + let n2 = Nat::from_le_bytes(&bytes); + assert_eq!(n, n2); + } + + #[test] + fn test_compile_name_anon() { + let stt = CompileState::default(); + let name = Name::anon(); + let addr = compile_name(&name, &stt); + // Name is stored in env.names, not blobs + let stored_name = stt.env.names.get(&addr).unwrap(); + assert_eq!(*stored_name, name); + } + + #[test] + fn test_compile_name_str() { + let stt = CompileState::default(); + let name = Name::str(Name::anon(), "foo".to_string()); + let addr = compile_name(&name, &stt); + // Name is stored in env.names + let stored_name = stt.env.names.get(&addr).unwrap(); + assert_eq!(*stored_name, name); + // String component should be in blobs + let foo_bytes = "foo".as_bytes(); + let foo_addr = Address::hash(foo_bytes); + assert!(stt.env.blobs.contains_key(&foo_addr)); + } + + #[test] + fn test_compile_expr_bvar() { + let stt = CompileState::default(); + let mut cache = BlockCache::default(); + let expr = LeanExpr::bvar(Nat::from(3u64)); + let result = + compile_expr(&expr, &[], &MutCtx::default(), &mut cache, &stt).unwrap(); + assert!(matches!(result.as_ref(), Expr::Var(3))); + } + + #[test] + fn test_compile_expr_sort() { + let stt = CompileState::default(); + let mut cache = BlockCache::default(); + let expr = LeanExpr::sort(Level::zero()); + let result = + compile_expr(&expr, &[], &MutCtx::default(), &mut cache, &stt).unwrap(); + match result.as_ref() { + Expr::Sort(idx) => { + assert_eq!(*idx, 0); + assert!(matches!( + cache.univs.get_index(0).unwrap().as_ref(), + Univ::Zero + )); + }, + _ => panic!("expected Sort"), + } + } + + #[test] + fn test_compile_expr_app() { + let stt = CompileState::default(); + let mut cache = BlockCache::default(); + let f = LeanExpr::bvar(Nat::from(0u64)); + let a = LeanExpr::bvar(Nat::from(1u64)); + let expr = LeanExpr::app(f, a); + let result = + compile_expr(&expr, &[], &MutCtx::default(), &mut cache, &stt).unwrap(); + match result.as_ref() { + Expr::App(f, a) => { + assert!(matches!(f.as_ref(), Expr::Var(0))); + assert!(matches!(a.as_ref(), Expr::Var(1))); + }, + _ => panic!("expected App"), + } + } + + #[test] + fn test_compile_expr_lam() { + let stt = CompileState::default(); + let mut cache = BlockCache::default(); + let ty = LeanExpr::sort(Level::zero()); + let body = LeanExpr::bvar(Nat::from(0u64)); + let expr = LeanExpr::lam(Name::anon(), ty, body, BinderInfo::Default); + let result = + compile_expr(&expr, &[], &MutCtx::default(), &mut cache, &stt).unwrap(); + match result.as_ref() { + Expr::Lam(ty, body) => { + match ty.as_ref() { + Expr::Sort(idx) => { + assert_eq!(*idx, 0); + assert!(matches!( + cache.univs.get_index(0).unwrap().as_ref(), + Univ::Zero + )); + }, + _ => panic!("expected Sort for ty"), + } + assert!(matches!(body.as_ref(), Expr::Var(0))); + }, + _ => panic!("expected Lam"), + } + } + + #[test] + fn test_compile_expr_nat_lit() { + let stt = CompileState::default(); + let mut cache = BlockCache::default(); + let expr = LeanExpr::lit(Literal::NatVal(Nat::from(42u64))); + let result = + compile_expr(&expr, &[], &MutCtx::default(), &mut cache, &stt).unwrap(); + match result.as_ref() { + Expr::Nat(ref_idx) => { + let addr = cache.refs.get_index(*ref_idx as usize).unwrap(); + let bytes = stt.env.get_blob(addr).unwrap(); + let n = Nat::from_le_bytes(&bytes); + assert_eq!(n, Nat::from(42u64)); + }, + _ => panic!("expected Nat"), + } + } + + #[test] + fn test_compile_expr_str_lit() { + let stt = CompileState::default(); + let mut cache = BlockCache::default(); + let expr = LeanExpr::lit(Literal::StrVal("hello".to_string())); + let result = + compile_expr(&expr, &[], &MutCtx::default(), &mut cache, &stt).unwrap(); + match result.as_ref() { + Expr::Str(ref_idx) => { + let addr = cache.refs.get_index(*ref_idx as usize).unwrap(); + let bytes = stt.env.get_blob(addr).unwrap(); + assert_eq!(String::from_utf8(bytes).unwrap(), "hello"); + }, + _ => panic!("expected Str"), + } + } + + #[test] + fn test_compile_axiom() { + use crate::ix::env::{AxiomVal, ConstantVal}; + + // Create a simple axiom: axiom myAxiom : Type + let name = Name::str(Name::anon(), "myAxiom".to_string()); + let typ = LeanExpr::sort(Level::succ(Level::zero())); // Type 0 + let cnst = ConstantVal { name: name.clone(), level_params: vec![], typ }; + let axiom = AxiomVal { cnst, is_unsafe: false }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name.clone(), LeanConstantInfo::AxiomInfo(axiom)); + let lean_env = Arc::new(lean_env); + + let stt = CompileState::default(); + let mut cache = BlockCache::default(); + let mut all = NameSet::default(); + all.insert(name.clone()); + + let result = compile_const(&name, &all, &lean_env, &mut cache, &stt); + assert!(result.is_ok(), "compile_const failed: {:?}", result.err()); + + let addr = result.unwrap(); + assert!(stt.name_to_addr.contains_key(&name)); + assert!(stt.env.get_const(&addr).is_some()); + } + + #[test] + fn test_compile_simple_def() { + use crate::ix::env::{ + ConstantVal, DefinitionSafety, DefinitionVal, ReducibilityHints, + }; + + // Create a simple definition: def myDef : Nat := 42 + let name = Name::str(Name::anon(), "myDef".to_string()); + let nat_name = Name::str(Name::anon(), "Nat".to_string()); + let typ = LeanExpr::cnst(nat_name.clone(), vec![]); + let value = LeanExpr::lit(Literal::NatVal(Nat::from(42u64))); + let cnst = ConstantVal { name: name.clone(), level_params: vec![], typ }; + let def = DefinitionVal { + cnst, + value, + hints: ReducibilityHints::Abbrev, + safety: DefinitionSafety::Safe, + all: vec![name.clone()], + }; + + let mut lean_env = LeanEnv::default(); + // Note: We also need Nat in the env for the reference to work, + // but for this test we just check the compile doesn't crash + lean_env.insert(name.clone(), LeanConstantInfo::DefnInfo(def)); + let lean_env = Arc::new(lean_env); + + let stt = CompileState::default(); + let mut cache = BlockCache::default(); + let mut all = NameSet::default(); + all.insert(name.clone()); + + // This will fail because nat_name isn't in name_to_addr, but let's see the error + let result = compile_const(&name, &all, &lean_env, &mut cache, &stt); + // We expect this to fail with MissingConstant for Nat + match result { + Err(CompileError::MissingConstant { name: missing }) => { + assert!( + missing.contains("Nat"), + "Expected missing Nat, got: {}", + missing + ); + }, + Err(e) => panic!("Unexpected error: {:?}", e), + Ok(_) => panic!("Expected error for missing Nat reference"), + } + } + + #[test] + fn test_compile_self_referential_def() { + use crate::ix::env::{ + ConstantInfo as LeanConstantInfo, ConstantVal, DefinitionSafety, + DefinitionVal, Env as LeanEnv, ReducibilityHints, + }; + use crate::ix::ixon::constant::ConstantInfo; + + // Create a self-referential definition (like a recursive function placeholder) + // def myDef : Type := myDef (this is silly but tests the mutual handling) + let name = Name::str(Name::anon(), "myDef".to_string()); + let typ = LeanExpr::sort(Level::succ(Level::zero())); // Type + let value = LeanExpr::cnst(name.clone(), vec![]); // self-reference + let cnst = ConstantVal { name: name.clone(), level_params: vec![], typ }; + let def = DefinitionVal { + cnst, + value, + hints: ReducibilityHints::Abbrev, + safety: DefinitionSafety::Safe, + all: vec![name.clone()], + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name.clone(), LeanConstantInfo::DefnInfo(def)); + let lean_env = Arc::new(lean_env); + + let stt = CompileState::default(); + let mut cache = BlockCache::default(); + let mut all = NameSet::default(); + all.insert(name.clone()); + + // This should work because it's a single self-referential def + let result = compile_const(&name, &all, &lean_env, &mut cache, &stt); + assert!(result.is_ok(), "compile_const failed: {:?}", result.err()); + + let addr = result.unwrap(); + assert!(stt.name_to_addr.contains_key(&name)); + + // Check the constant was stored + let cnst = stt.env.get_const(&addr); + assert!(cnst.is_some()); + match cnst.unwrap() { + Constant { info: ConstantInfo::Defn(d), .. } => { + // Value should be a Rec(0) since it's self-referential in a single-element block + match d.value.as_ref() { + Expr::Rec(0, _) => {}, // Expected + other => panic!("Expected Rec(0), got {:?}", other), + } + }, + other => panic!("Expected Defn, got {:?}", other), + } + } + + #[test] + fn test_compile_env_single_axiom() { + use crate::ix::env::{AxiomVal, ConstantVal}; + + // Create a minimal environment with just one axiom + let name = Name::str(Name::anon(), "myAxiom".to_string()); + let typ = LeanExpr::sort(Level::succ(Level::zero())); // Type 0 + let cnst = ConstantVal { name: name.clone(), level_params: vec![], typ }; + let axiom = AxiomVal { cnst, is_unsafe: false }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name.clone(), LeanConstantInfo::AxiomInfo(axiom)); + let lean_env = Arc::new(lean_env); + + let result = compile_env(&lean_env); + assert!(result.is_ok(), "compile_env failed: {:?}", result.err()); + + let stt = result.unwrap(); + assert!(stt.name_to_addr.contains_key(&name), "name not in name_to_addr"); + assert_eq!(stt.env.const_count(), 1, "expected 1 constant"); + } + + #[test] + fn test_compile_env_two_independent_axioms() { + use crate::ix::env::{AxiomVal, ConstantVal}; + + let name1 = Name::str(Name::anon(), "axiom1".to_string()); + let name2 = Name::str(Name::anon(), "axiom2".to_string()); + let typ = LeanExpr::sort(Level::succ(Level::zero())); + + let axiom1 = AxiomVal { + cnst: ConstantVal { + name: name1.clone(), + level_params: vec![], + typ: typ.clone(), + }, + is_unsafe: false, + }; + let axiom2 = AxiomVal { + cnst: ConstantVal { name: name2.clone(), level_params: vec![], typ }, + is_unsafe: false, + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name1.clone(), LeanConstantInfo::AxiomInfo(axiom1)); + lean_env.insert(name2.clone(), LeanConstantInfo::AxiomInfo(axiom2)); + let lean_env = Arc::new(lean_env); + + let result = compile_env(&lean_env); + assert!(result.is_ok(), "compile_env failed: {:?}", result.err()); + + let stt = result.unwrap(); + // Both names should be registered + assert!(stt.name_to_addr.contains_key(&name1), "name1 not in name_to_addr"); + assert!(stt.name_to_addr.contains_key(&name2), "name2 not in name_to_addr"); + // Both names point to the same constant (alpha-equivalent axioms) + let addr1 = stt.name_to_addr.get(&name1).unwrap().clone(); + let addr2 = stt.name_to_addr.get(&name2).unwrap().clone(); + assert_eq!( + addr1, addr2, + "alpha-equivalent axioms should have same address" + ); + // Only 1 unique constant in the store (alpha-equivalent axioms deduplicated) + assert_eq!(stt.env.const_count(), 1); + } + + #[test] + fn test_compile_env_def_referencing_axiom() { + use crate::ix::env::{ + AxiomVal, ConstantVal, DefinitionSafety, DefinitionVal, ReducibilityHints, + }; + + let axiom_name = Name::str(Name::anon(), "myType".to_string()); + let def_name = Name::str(Name::anon(), "myDef".to_string()); + + // axiom myType : Type + let axiom = AxiomVal { + cnst: ConstantVal { + name: axiom_name.clone(), + level_params: vec![], + typ: LeanExpr::sort(Level::succ(Level::zero())), + }, + is_unsafe: false, + }; + + // def myDef : myType := myType (referencing the axiom in the value) + let def = DefinitionVal { + cnst: ConstantVal { + name: def_name.clone(), + level_params: vec![], + typ: LeanExpr::cnst(axiom_name.clone(), vec![]), + }, + value: LeanExpr::cnst(axiom_name.clone(), vec![]), // reference the axiom + hints: ReducibilityHints::Abbrev, + safety: DefinitionSafety::Safe, + all: vec![def_name.clone()], + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(axiom_name.clone(), LeanConstantInfo::AxiomInfo(axiom)); + lean_env.insert(def_name.clone(), LeanConstantInfo::DefnInfo(def)); + let lean_env = Arc::new(lean_env); + + let result = compile_env(&lean_env); + assert!(result.is_ok(), "compile_env failed: {:?}", result.err()); + + let stt = result.unwrap(); + assert!(stt.name_to_addr.contains_key(&axiom_name)); + assert!(stt.name_to_addr.contains_key(&def_name)); + assert_eq!(stt.env.const_count(), 2); + } + + /// Test that alpha-equivalent mutual definitions produce correct projection + /// indices. Two definitions with identical type/value structure (but different + /// names) should form one equivalence class, and projections should resolve + /// to the single representative in the Muts array. + #[test] + fn test_compile_mutual_alpha_equivalent_defs() { + use crate::ix::env::{ + ConstantVal, DefinitionSafety, DefinitionVal, ReducibilityHints, + }; + + // Create two mutually recursive definitions with identical structure. + // Both: def X : Type := Type (referencing each other, same shape) + let name_f = Name::str(Name::anon(), "f".to_string()); + let name_g = Name::str(Name::anon(), "g".to_string()); + + let typ = LeanExpr::sort(Level::succ(Level::zero())); // Type + + // f and g reference each other but with identical structure: + // f : Type := g and g : Type := f + // After alpha-normalization (mutual refs become recur indices), + // both become: recur(0) since they're in the same class. + let def_f = DefinitionVal { + cnst: ConstantVal { + name: name_f.clone(), + level_params: vec![], + typ: typ.clone(), + }, + value: LeanExpr::cnst(name_g.clone(), vec![]), + hints: ReducibilityHints::Opaque, + safety: DefinitionSafety::Safe, + all: vec![name_f.clone(), name_g.clone()], + }; + + let def_g = DefinitionVal { + cnst: ConstantVal { + name: name_g.clone(), + level_params: vec![], + typ: typ.clone(), + }, + value: LeanExpr::cnst(name_f.clone(), vec![]), + hints: ReducibilityHints::Opaque, + safety: DefinitionSafety::Safe, + all: vec![name_f.clone(), name_g.clone()], + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name_f.clone(), LeanConstantInfo::DefnInfo(def_f)); + lean_env.insert(name_g.clone(), LeanConstantInfo::DefnInfo(def_g)); + let lean_env = Arc::new(lean_env); + + let result = compile_env(&lean_env); + assert!(result.is_ok(), "compile_env failed: {:?}", result.err()); + + let stt = result.unwrap(); + + // Both names should be registered + assert!(stt.name_to_addr.contains_key(&name_f), "f not in name_to_addr"); + assert!(stt.name_to_addr.contains_key(&name_g), "g not in name_to_addr"); + + // Both should point to the same block address (same projection, + // since they're alpha-equivalent and share idx=0) + let addr_f = stt.name_to_addr.get(&name_f).unwrap().clone(); + let addr_g = stt.name_to_addr.get(&name_g).unwrap().clone(); + assert_eq!( + addr_f, addr_g, + "alpha-equivalent mutual defs should have same projection address" + ); + + // Verify the block exists and has exactly 1 mutual entry + // (one representative for the equivalence class, not two) + for block_addr in stt.blocks.iter() { + let block = stt.env.get_const(&block_addr).unwrap(); + if let ConstantInfo::Muts(muts) = &block.info { + assert_eq!( + muts.len(), + 1, + "alpha-equivalent class should produce 1 entry in Muts, got {}", + muts.len() + ); + } + } + } + + /// Test that alpha-equivalent defs in a mutual block with a non-equivalent + /// third definition produce correct indices: 2 classes → 2 Muts entries, + /// with projections indexing correctly into the array. + #[test] + fn test_compile_mutual_alpha_equiv_with_different_third() { + use crate::ix::env::{ + ConstantVal, DefinitionSafety, DefinitionVal, ReducibilityHints, + }; + + let name_f = Name::str(Name::anon(), "f".to_string()); + let name_g = Name::str(Name::anon(), "g".to_string()); + let name_h = Name::str(Name::anon(), "h".to_string()); + + let typ = LeanExpr::sort(Level::succ(Level::zero())); // Type + + // f and g are alpha-equivalent to each other: + // f : Type := App(g, h) g : Type := App(f, h) + // After alpha-normalization, both become App(recur(class_of_fg), recur(class_of_h)) + // h is structurally different: + // h : Type := f + // After alpha-normalization: recur(class_of_fg) + // All three form one SCC: f→g,h g→f,h h→f + let def_f = DefinitionVal { + cnst: ConstantVal { + name: name_f.clone(), + level_params: vec![], + typ: typ.clone(), + }, + value: LeanExpr::app( + LeanExpr::cnst(name_g.clone(), vec![]), + LeanExpr::cnst(name_h.clone(), vec![]), + ), + hints: ReducibilityHints::Opaque, + safety: DefinitionSafety::Safe, + all: vec![name_f.clone(), name_g.clone(), name_h.clone()], + }; + + let def_g = DefinitionVal { + cnst: ConstantVal { + name: name_g.clone(), + level_params: vec![], + typ: typ.clone(), + }, + value: LeanExpr::app( + LeanExpr::cnst(name_f.clone(), vec![]), + LeanExpr::cnst(name_h.clone(), vec![]), + ), + hints: ReducibilityHints::Opaque, + safety: DefinitionSafety::Safe, + all: vec![name_f.clone(), name_g.clone(), name_h.clone()], + }; + + let def_h = DefinitionVal { + cnst: ConstantVal { + name: name_h.clone(), + level_params: vec![], + typ: typ.clone(), + }, + value: LeanExpr::cnst(name_f.clone(), vec![]), + hints: ReducibilityHints::Opaque, + safety: DefinitionSafety::Safe, + all: vec![name_f.clone(), name_g.clone(), name_h.clone()], + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name_f.clone(), LeanConstantInfo::DefnInfo(def_f)); + lean_env.insert(name_g.clone(), LeanConstantInfo::DefnInfo(def_g)); + lean_env.insert(name_h.clone(), LeanConstantInfo::DefnInfo(def_h)); + let lean_env = Arc::new(lean_env); + + let result = compile_env(&lean_env); + assert!(result.is_ok(), "compile_env failed: {:?}", result.err()); + + let stt = result.unwrap(); + + // All three should be registered + assert!(stt.name_to_addr.contains_key(&name_f)); + assert!(stt.name_to_addr.contains_key(&name_g)); + assert!(stt.name_to_addr.contains_key(&name_h)); + + // f and g are alpha-equivalent → same projection address + let addr_f = stt.name_to_addr.get(&name_f).unwrap().clone(); + let addr_g = stt.name_to_addr.get(&name_g).unwrap().clone(); + assert_eq!( + addr_f, addr_g, + "alpha-equivalent f and g should share projection address" + ); + + // h is different → different projection address + let addr_h = stt.name_to_addr.get(&name_h).unwrap().clone(); + assert_ne!( + addr_f, addr_h, + "h should have a different projection address than f/g" + ); + + // Verify Muts has exactly 2 entries (one per equivalence class) + for block_addr in stt.blocks.iter() { + let block = stt.env.get_const(&block_addr).unwrap(); + if let ConstantInfo::Muts(muts) = &block.info { + assert_eq!( + muts.len(), + 2, + "2 equivalence classes should produce 2 Muts entries, got {}", + muts.len() + ); + } + } + } + + // ========================================================================= + // Sharing tests + // ========================================================================= + + #[test] + fn test_mutual_block_roundtrip() { + use crate::ix::env::DefinitionSafety; + use crate::ix::ixon::constant::{DefKind, Definition}; + + // Create a mutual block and verify it roundtrips through serialization + let sort0 = Expr::sort(0); + let ty = Expr::all(sort0.clone(), Expr::var(0)); + + let def1 = IxonMutConst::Defn(Definition { + kind: DefKind::Definition, + safety: DefinitionSafety::Safe, + lvls: 0, + typ: ty.clone(), + value: Expr::var(0), + }); + + let def2 = IxonMutConst::Defn(Definition { + kind: DefKind::Theorem, + safety: DefinitionSafety::Safe, + lvls: 0, + typ: ty, + value: Expr::var(1), + }); + + let compiled = compile_mutual_block(vec![def1, def2], vec![], vec![], None); + let constant = compiled.constant; + let addr = compiled.addr; + + // Serialize + let mut buf = Vec::new(); + constant.put(&mut buf); + + // Deserialize + let recovered = Constant::get(&mut buf.as_slice()).unwrap(); + + // Re-serialize to check determinism + let mut buf2 = Vec::new(); + recovered.put(&mut buf2); + + assert_eq!(buf, buf2, "Serialization should be deterministic"); + + // Re-hash to check address stability + let addr2 = Address::hash(&buf2); + assert_eq!(addr, addr2, "Content address should be stable"); + } + + // ========================================================================= + // Constant-level sharing tests + // ========================================================================= + + #[test] + fn test_apply_sharing_basic() { + // Test the apply_sharing helper function with a repeated subterm + let sort0 = Expr::sort(0); + let var0 = Expr::var(0); + // Create term: App(Lam(Sort0, Var0), Lam(Sort0, Var0)) + // Lam(Sort0, Var0) is repeated and should be shared + let lam = Expr::lam(sort0.clone(), var0); + let app = Expr::app(lam.clone(), lam); + + let (rewritten, sharing) = apply_sharing(vec![app]); + + // Should have sharing since lam is used twice + assert!(!sharing.is_empty(), "Expected sharing for repeated subterm"); + // The sharing vector should contain the shared Lam + assert!(sharing.iter().any(|e| matches!(e.as_ref(), Expr::Lam(_, _)))); + // The rewritten expression should have Share references + assert!(matches!(rewritten[0].as_ref(), Expr::App(_, _))); + } + + #[test] + fn test_definition_with_sharing() { + use crate::ix::ixon::constant::{DefKind, Definition}; + + // Create a definition where typ and value share structure + let sort0 = Expr::sort(0); + let shared_subterm = Expr::all(sort0.clone(), Expr::var(0)); + // typ = App(shared, shared) -- shared twice + let typ = Expr::app(shared_subterm.clone(), shared_subterm.clone()); + // value = shared + let value = shared_subterm; + + let (rewritten, sharing) = apply_sharing(vec![typ, value]); + + // shared_subterm appears 3 times total, should definitely be shared + assert!( + !sharing.is_empty(), + "Expected sharing for definition with repeated subterms" + ); + + // Create constant with sharing at Constant level + let def = Definition { + kind: DefKind::Definition, + safety: crate::ix::env::DefinitionSafety::Safe, + lvls: 0, + typ: rewritten[0].clone(), + value: rewritten[1].clone(), + }; + + let constant = Constant::with_tables( + ConstantInfo::Defn(def), + sharing.clone(), + vec![], + vec![], + ); + + let mut buf = Vec::new(); + constant.put(&mut buf); + let recovered = Constant::get(&mut buf.as_slice()).unwrap(); + + assert_eq!(sharing.len(), recovered.sharing.len()); + assert!(matches!(recovered.info, ConstantInfo::Defn(_))); + } + + #[test] + fn test_axiom_with_sharing() { + use crate::ix::ixon::constant::Axiom; + + // Axiom with repeated subterms in its type + let sort0 = Expr::sort(0); + let shared = Expr::all(sort0.clone(), Expr::var(0)); + // typ = All(shared, All(shared, Var(0))) + let typ = + Expr::all(shared.clone(), Expr::all(shared.clone(), Expr::var(0))); + + let (rewritten, sharing) = apply_sharing(vec![typ]); + + // shared appears twice, should be shared + assert!( + !sharing.is_empty(), + "Expected sharing for axiom with repeated subterms" + ); + + let axiom = Axiom { is_unsafe: false, lvls: 0, typ: rewritten[0].clone() }; + let constant = Constant::with_tables( + ConstantInfo::Axio(axiom), + sharing.clone(), + vec![], + vec![], + ); + + let mut buf = Vec::new(); + constant.put(&mut buf); + let recovered = Constant::get(&mut buf.as_slice()).unwrap(); + + assert_eq!(sharing.len(), recovered.sharing.len()); + assert!(matches!(recovered.info, ConstantInfo::Axio(_))); + } + + #[test] + fn test_recursor_with_sharing() { + use crate::ix::ixon::constant::{Recursor, RecursorRule}; + + // Recursor with shared subterms across typ and rules + let sort0 = Expr::sort(0); + let shared = Expr::lam(sort0.clone(), Expr::var(0)); + + // typ uses shared twice + let typ = Expr::app(shared.clone(), shared.clone()); + + // rules also use shared + let rules = vec![ + RecursorRule { fields: 0, rhs: shared.clone() }, + RecursorRule { fields: 1, rhs: shared }, + ]; + + // Collect all expressions + let mut all_exprs = vec![typ]; + for r in &rules { + all_exprs.push(r.rhs.clone()); + } + + let (rewritten, sharing) = apply_sharing(all_exprs); + + // shared appears 4 times, should definitely be shared + assert!( + !sharing.is_empty(), + "Expected sharing for recursor with repeated subterms" + ); + + let rec = Recursor { + k: false, + is_unsafe: false, + lvls: 0, + params: 0, + indices: 0, + motives: 1, + minors: 2, + typ: rewritten[0].clone(), + rules: rules + .into_iter() + .zip(rewritten.into_iter().skip(1)) + .map(|(r, rhs)| RecursorRule { fields: r.fields, rhs }) + .collect(), + }; + + let constant = Constant::with_tables( + ConstantInfo::Recr(rec), + sharing.clone(), + vec![], + vec![], + ); + + let mut buf = Vec::new(); + constant.put(&mut buf); + let recovered = Constant::get(&mut buf.as_slice()).unwrap(); + + assert_eq!(sharing.len(), recovered.sharing.len()); + if let ConstantInfo::Recr(rec2) = &recovered.info { + assert_eq!(2, rec2.rules.len()); + } else { + panic!("Expected Recursor"); + } + } + + #[test] + fn test_inductive_with_sharing() { + use crate::ix::ixon::constant::{Constructor, Inductive}; + + // Inductive with shared subterms across type and constructors + let sort0 = Expr::sort(0); + let shared = Expr::all(sort0.clone(), Expr::var(0)); + + let typ = Expr::app(shared.clone(), shared.clone()); + + let ctors = vec![ + Constructor { + is_unsafe: false, + lvls: 0, + cidx: 0, + params: 0, + fields: 0, + typ: shared.clone(), + }, + Constructor { + is_unsafe: false, + lvls: 0, + cidx: 1, + params: 0, + fields: 1, + typ: shared, + }, + ]; + + // Collect all expressions + let mut all_exprs = vec![typ]; + for c in &ctors { + all_exprs.push(c.typ.clone()); + } + + let (rewritten, sharing) = apply_sharing(all_exprs); + + // shared appears 4 times, should be shared + assert!( + !sharing.is_empty(), + "Expected sharing for inductive with repeated subterms" + ); + + let ind = Inductive { + recr: false, + refl: false, + is_unsafe: false, + lvls: 0, + params: 0, + indices: 0, + nested: 0, + typ: rewritten[0].clone(), + ctors: ctors + .into_iter() + .zip(rewritten.into_iter().skip(1)) + .map(|(c, typ)| Constructor { + is_unsafe: c.is_unsafe, + lvls: c.lvls, + cidx: c.cidx, + params: c.params, + fields: c.fields, + typ, + }) + .collect(), + }; + + // Wrap in MutConst for serialization with sharing at Constant level + let constant = Constant::with_tables( + ConstantInfo::Muts(vec![IxonMutConst::Indc(ind)]), + sharing.clone(), + vec![], + vec![], + ); + + let mut buf = Vec::new(); + constant.put(&mut buf); + let recovered = Constant::get(&mut buf.as_slice()).unwrap(); + + assert_eq!(sharing.len(), recovered.sharing.len()); + if let ConstantInfo::Muts(mutuals) = &recovered.info { + if let Some(IxonMutConst::Indc(ind2)) = mutuals.first() { + assert_eq!(2, ind2.ctors.len()); + } else { + panic!("Expected Inductive in Muts"); + } + } else { + panic!("Expected Muts"); + } + } + + #[test] + fn test_no_sharing_when_not_repeated() { + // When a subterm only appears once, it shouldn't be shared + let _sort0 = Expr::sort(0); + let var0 = Expr::var(0); + let var1 = Expr::var(1); + let app = Expr::app(var0, var1); + + let (rewritten, sharing) = apply_sharing(vec![app.clone()]); + + // No repeated subterms, so no sharing + assert!(sharing.is_empty(), "Expected no sharing when nothing is repeated"); + // Rewritten should be identical to original + assert_eq!(rewritten[0].as_ref(), app.as_ref()); + } + + // ========================================================================= + // Compile/Decompile Roundtrip Tests + // ========================================================================= + + #[test] + fn test_roundtrip_axiom() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{AxiomVal, ConstantVal}; + + // Create an axiom: axiom myAxiom : Type + let name = Name::str(Name::anon(), "myAxiom".to_string()); + let typ = LeanExpr::sort(Level::succ(Level::zero())); // Type 0 + let cnst = ConstantVal { name: name.clone(), level_params: vec![], typ }; + let axiom = AxiomVal { cnst, is_unsafe: false }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name.clone(), LeanConstantInfo::AxiomInfo(axiom.clone())); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check roundtrip + let recovered = + dstt.env.get(&name).expect("name not found in decompiled env"); + match &*recovered { + LeanConstantInfo::AxiomInfo(ax) => { + assert_eq!(ax.cnst.name, axiom.cnst.name); + assert_eq!(ax.is_unsafe, axiom.is_unsafe); + assert_eq!(ax.cnst.level_params.len(), axiom.cnst.level_params.len()); + }, + _ => panic!("Expected AxiomInfo"), + } + } + + #[test] + fn test_roundtrip_axiom_with_level_params() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{AxiomVal, ConstantVal, Env as LeanEnv}; + + // Create an axiom with universe params: axiom myAxiom.{u, v} : Sort (max u v) + let name = Name::str(Name::anon(), "myAxiom".to_string()); + let u = Name::str(Name::anon(), "u".to_string()); + let v = Name::str(Name::anon(), "v".to_string()); + let typ = LeanExpr::sort(Level::max( + Level::param(u.clone()), + Level::param(v.clone()), + )); + let cnst = ConstantVal { + name: name.clone(), + level_params: vec![u.clone(), v.clone()], + typ, + }; + let axiom = AxiomVal { cnst, is_unsafe: false }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name.clone(), LeanConstantInfo::AxiomInfo(axiom.clone())); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check roundtrip + let recovered = dstt.env.get(&name).expect("name not found"); + match &*recovered { + LeanConstantInfo::AxiomInfo(ax) => { + assert_eq!(ax.cnst.name, name); + assert_eq!(ax.cnst.level_params.len(), 2); + assert_eq!(ax.cnst.level_params[0], u); + assert_eq!(ax.cnst.level_params[1], v); + }, + _ => panic!("Expected AxiomInfo"), + } + } + + #[test] + fn test_roundtrip_definition() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ + ConstantVal, DefinitionSafety, DefinitionVal, ReducibilityHints, + }; + + // Create a definition: def id : Type -> Type := fun x => x + let name = Name::str(Name::anon(), "id".to_string()); + let type1 = LeanExpr::sort(Level::succ(Level::zero())); // Type + let typ = LeanExpr::all( + Name::str(Name::anon(), "x".to_string()), + type1.clone(), + type1.clone(), + crate::ix::env::BinderInfo::Default, + ); + let value = LeanExpr::lam( + Name::str(Name::anon(), "x".to_string()), + type1, + LeanExpr::bvar(Nat::from(0u64)), + crate::ix::env::BinderInfo::Default, + ); + let def = DefinitionVal { + cnst: ConstantVal { name: name.clone(), level_params: vec![], typ }, + value, + hints: ReducibilityHints::Abbrev, + safety: DefinitionSafety::Safe, + all: vec![name.clone()], + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name.clone(), LeanConstantInfo::DefnInfo(def.clone())); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check roundtrip + let recovered = dstt.env.get(&name).expect("name not found"); + match &*recovered { + LeanConstantInfo::DefnInfo(d) => { + assert_eq!(d.cnst.name, name); + assert_eq!(d.hints, def.hints); + assert_eq!(d.safety, def.safety); + assert_eq!(d.all.len(), def.all.len()); + }, + _ => panic!("Expected DefnInfo"), + } + } + + #[test] + fn test_roundtrip_def_referencing_axiom() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ + AxiomVal, ConstantVal, DefinitionSafety, DefinitionVal, Env as LeanEnv, + ReducibilityHints, + }; + + // Create axiom A : Type and def B : A := A + let axiom_name = Name::str(Name::anon(), "A".to_string()); + let def_name = Name::str(Name::anon(), "B".to_string()); + + let type0 = LeanExpr::sort(Level::succ(Level::zero())); + let axiom = AxiomVal { + cnst: ConstantVal { + name: axiom_name.clone(), + level_params: vec![], + typ: type0, + }, + is_unsafe: false, + }; + + let def = DefinitionVal { + cnst: ConstantVal { + name: def_name.clone(), + level_params: vec![], + typ: LeanExpr::cnst(axiom_name.clone(), vec![]), + }, + value: LeanExpr::cnst(axiom_name.clone(), vec![]), + hints: ReducibilityHints::Abbrev, + safety: DefinitionSafety::Safe, + all: vec![def_name.clone()], + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(axiom_name.clone(), LeanConstantInfo::AxiomInfo(axiom)); + lean_env.insert(def_name.clone(), LeanConstantInfo::DefnInfo(def)); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check both roundtrip + assert!(dstt.env.contains_key(&axiom_name)); + assert!(dstt.env.contains_key(&def_name)); + + match &*dstt.env.get(&def_name).unwrap() { + LeanConstantInfo::DefnInfo(d) => { + assert_eq!(d.cnst.name, def_name); + }, + _ => panic!("Expected DefnInfo"), + } + } + + #[test] + fn test_roundtrip_quotient() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ConstantVal, Env as LeanEnv, QuotKind, QuotVal}; + + // Create quotient constants + let quot_name = Name::str(Name::anon(), "Quot".to_string()); + let u = Name::str(Name::anon(), "u".to_string()); + + // Quot.{u} : (α : Sort u) → (α → α → Prop) → Sort u + let alpha = Name::str(Name::anon(), "α".to_string()); + let sort_u = LeanExpr::sort(Level::param(u.clone())); + let prop = LeanExpr::sort(Level::zero()); + + // Build: (α : Sort u) → (α → α → Prop) → Sort u + let rel_type = LeanExpr::all( + Name::anon(), + LeanExpr::bvar(Nat::from(0u64)), + LeanExpr::all( + Name::anon(), + LeanExpr::bvar(Nat::from(1u64)), + prop.clone(), + crate::ix::env::BinderInfo::Default, + ), + crate::ix::env::BinderInfo::Default, + ); + let typ = LeanExpr::all( + alpha, + sort_u.clone(), + LeanExpr::all( + Name::anon(), + rel_type, + sort_u.clone(), + crate::ix::env::BinderInfo::Default, + ), + crate::ix::env::BinderInfo::Default, + ); + + let quot = QuotVal { + cnst: ConstantVal { + name: quot_name.clone(), + level_params: vec![u.clone()], + typ, + }, + kind: QuotKind::Type, + }; + + let mut lean_env = LeanEnv::default(); + lean_env + .insert(quot_name.clone(), LeanConstantInfo::QuotInfo(quot.clone())); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check roundtrip + let recovered = dstt.env.get("_name).expect("name not found"); + match &*recovered { + LeanConstantInfo::QuotInfo(q) => { + assert_eq!(q.cnst.name, quot_name); + assert_eq!(q.kind, QuotKind::Type); + assert_eq!(q.cnst.level_params.len(), 1); + }, + _ => panic!("Expected QuotInfo"), + } + } + + #[test] + fn test_roundtrip_theorem() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ConstantVal, Env as LeanEnv, TheoremVal}; + + // Create a theorem: theorem trivial : True := True.intro + let name = Name::str(Name::anon(), "trivial".to_string()); + let prop = LeanExpr::sort(Level::zero()); // Prop + + // For simplicity, just use Prop as both type and value + let thm = TheoremVal { + cnst: ConstantVal { + name: name.clone(), + level_params: vec![], + typ: prop.clone(), + }, + value: prop, + all: vec![name.clone()], + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name.clone(), LeanConstantInfo::ThmInfo(thm.clone())); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check roundtrip + let recovered = dstt.env.get(&name).expect("name not found"); + match &*recovered { + LeanConstantInfo::ThmInfo(t) => { + assert_eq!(t.cnst.name, name); + assert_eq!(t.all.len(), 1); + }, + _ => panic!("Expected ThmInfo"), + } + } + + #[test] + fn test_roundtrip_opaque() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ConstantVal, Env as LeanEnv, OpaqueVal}; + + // Create an opaque: opaque secret : Nat := 42 + let name = Name::str(Name::anon(), "secret".to_string()); + let nat_type = LeanExpr::sort(Level::zero()); // Using Prop as placeholder + + let opaq = OpaqueVal { + cnst: ConstantVal { + name: name.clone(), + level_params: vec![], + typ: nat_type.clone(), + }, + value: nat_type, + is_unsafe: false, + all: vec![name.clone()], + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(name.clone(), LeanConstantInfo::OpaqueInfo(opaq.clone())); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check roundtrip + let recovered = dstt.env.get(&name).expect("name not found"); + match &*recovered { + LeanConstantInfo::OpaqueInfo(o) => { + assert_eq!(o.cnst.name, name); + assert!(!o.is_unsafe); + assert_eq!(o.all.len(), 1); + }, + _ => panic!("Expected OpaqueInfo"), + } + } + + #[test] + fn test_roundtrip_multiple_constants() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ + AxiomVal, ConstantVal, DefinitionSafety, DefinitionVal, Env as LeanEnv, + ReducibilityHints, TheoremVal, + }; + + // Create multiple constants of different types + let axiom_name = Name::str(Name::anon(), "A".to_string()); + let def_name = Name::str(Name::anon(), "B".to_string()); + let thm_name = Name::str(Name::anon(), "C".to_string()); + + let type0 = LeanExpr::sort(Level::succ(Level::zero())); + let prop = LeanExpr::sort(Level::zero()); + + let axiom = AxiomVal { + cnst: ConstantVal { + name: axiom_name.clone(), + level_params: vec![], + typ: type0.clone(), + }, + is_unsafe: false, + }; + + let def = DefinitionVal { + cnst: ConstantVal { + name: def_name.clone(), + level_params: vec![], + typ: type0, + }, + value: LeanExpr::cnst(axiom_name.clone(), vec![]), + hints: ReducibilityHints::Regular(10), + safety: DefinitionSafety::Safe, + all: vec![def_name.clone()], + }; + + let thm = TheoremVal { + cnst: ConstantVal { + name: thm_name.clone(), + level_params: vec![], + typ: prop.clone(), + }, + value: prop, + all: vec![thm_name.clone()], + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(axiom_name.clone(), LeanConstantInfo::AxiomInfo(axiom)); + lean_env.insert(def_name.clone(), LeanConstantInfo::DefnInfo(def)); + lean_env.insert(thm_name.clone(), LeanConstantInfo::ThmInfo(thm)); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + assert_eq!(stt.env.const_count(), 3); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check all constants roundtrip + assert!(matches!( + &*dstt.env.get(&axiom_name).unwrap(), + LeanConstantInfo::AxiomInfo(_) + )); + assert!(matches!( + &*dstt.env.get(&def_name).unwrap(), + LeanConstantInfo::DefnInfo(_) + )); + assert!(matches!( + &*dstt.env.get(&thm_name).unwrap(), + LeanConstantInfo::ThmInfo(_) + )); + } + + #[test] + fn test_roundtrip_inductive_simple() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ + ConstantVal, ConstructorVal, Env as LeanEnv, InductiveVal, + }; + + // Create a simple inductive: inductive Unit : Type where | unit : Unit + // No recursor to keep it simple and self-contained + let unit_name = Name::str(Name::anon(), "Unit".to_string()); + let unit_ctor_name = Name::str(unit_name.clone(), "unit".to_string()); + + let type0 = LeanExpr::sort(Level::succ(Level::zero())); // Type + + // Unit : Type + let inductive = InductiveVal { + cnst: ConstantVal { + name: unit_name.clone(), + level_params: vec![], + typ: type0.clone(), + }, + num_params: Nat::from(0u64), + num_indices: Nat::from(0u64), + all: vec![unit_name.clone()], + ctors: vec![unit_ctor_name.clone()], + num_nested: Nat::from(0u64), + is_rec: false, + is_unsafe: false, + is_reflexive: false, + }; + + // Unit.unit : Unit + let ctor = ConstructorVal { + cnst: ConstantVal { + name: unit_ctor_name.clone(), + level_params: vec![], + typ: LeanExpr::cnst(unit_name.clone(), vec![]), + }, + induct: unit_name.clone(), + cidx: Nat::from(0u64), + num_params: Nat::from(0u64), + num_fields: Nat::from(0u64), + is_unsafe: false, + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert( + unit_name.clone(), + LeanConstantInfo::InductInfo(inductive.clone()), + ); + lean_env + .insert(unit_ctor_name.clone(), LeanConstantInfo::CtorInfo(ctor.clone())); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check roundtrip for inductive + let recovered_ind = dstt.env.get(&unit_name).expect("Unit not found"); + match &*recovered_ind { + LeanConstantInfo::InductInfo(i) => { + assert_eq!(i.cnst.name, unit_name); + assert_eq!(i.ctors.len(), 1); + assert_eq!(i.all.len(), 1); + }, + _ => panic!("Expected InductInfo"), + } + + // Check roundtrip for constructor + let recovered_ctor = + dstt.env.get(&unit_ctor_name).expect("Unit.unit not found"); + match &*recovered_ctor { + LeanConstantInfo::CtorInfo(c) => { + assert_eq!(c.cnst.name, unit_ctor_name); + assert_eq!(c.induct, unit_name); + }, + _ => panic!("Expected CtorInfo"), + } + } + + #[test] + fn test_roundtrip_inductive_with_multiple_ctors() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ + ConstantVal, ConstructorVal, Env as LeanEnv, InductiveVal, + }; + + // Create Bool with two constructors (no recursor to keep self-contained) + let bool_name = Name::str(Name::anon(), "Bool".to_string()); + let false_name = Name::str(bool_name.clone(), "false".to_string()); + let true_name = Name::str(bool_name.clone(), "true".to_string()); + + let type0 = LeanExpr::sort(Level::succ(Level::zero())); + let bool_type = LeanExpr::cnst(bool_name.clone(), vec![]); + + let inductive = InductiveVal { + cnst: ConstantVal { + name: bool_name.clone(), + level_params: vec![], + typ: type0, + }, + num_params: Nat::from(0u64), + num_indices: Nat::from(0u64), + all: vec![bool_name.clone()], + ctors: vec![false_name.clone(), true_name.clone()], + num_nested: Nat::from(0u64), + is_rec: false, + is_unsafe: false, + is_reflexive: false, + }; + + let ctor_false = ConstructorVal { + cnst: ConstantVal { + name: false_name.clone(), + level_params: vec![], + typ: bool_type.clone(), + }, + induct: bool_name.clone(), + cidx: Nat::from(0u64), + num_params: Nat::from(0u64), + num_fields: Nat::from(0u64), + is_unsafe: false, + }; + + let ctor_true = ConstructorVal { + cnst: ConstantVal { + name: true_name.clone(), + level_params: vec![], + typ: bool_type.clone(), + }, + induct: bool_name.clone(), + cidx: Nat::from(1u64), + num_params: Nat::from(0u64), + num_fields: Nat::from(0u64), + is_unsafe: false, + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(bool_name.clone(), LeanConstantInfo::InductInfo(inductive)); + lean_env.insert(false_name.clone(), LeanConstantInfo::CtorInfo(ctor_false)); + lean_env.insert(true_name.clone(), LeanConstantInfo::CtorInfo(ctor_true)); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check roundtrip + let recovered = dstt.env.get(&bool_name).expect("Bool not found"); + match &*recovered { + LeanConstantInfo::InductInfo(i) => { + assert_eq!(i.cnst.name, bool_name); + assert_eq!(i.ctors.len(), 2); + }, + _ => panic!("Expected InductInfo"), + } + + // Check both constructors + assert!(dstt.env.contains_key(&false_name)); + assert!(dstt.env.contains_key(&true_name)); + } + + #[test] + fn test_roundtrip_mutual_definitions() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ + ConstantVal, DefinitionSafety, DefinitionVal, Env as LeanEnv, + ReducibilityHints, + }; + + // Create mutual definitions that only reference each other (self-contained) + // def f : Type → Type and def g : Type → Type + // where f references g and g references f + let f_name = Name::str(Name::anon(), "f".to_string()); + let g_name = Name::str(Name::anon(), "g".to_string()); + + let type0 = LeanExpr::sort(Level::succ(Level::zero())); // Type + let fn_type = LeanExpr::all( + Name::anon(), + type0.clone(), + type0.clone(), + crate::ix::env::BinderInfo::Default, + ); + + // f := fun x => g x + let f_value = LeanExpr::lam( + Name::str(Name::anon(), "x".to_string()), + type0.clone(), + LeanExpr::app( + LeanExpr::cnst(g_name.clone(), vec![]), + LeanExpr::bvar(Nat::from(0u64)), + ), + crate::ix::env::BinderInfo::Default, + ); + + // g := fun x => f x + let g_value = LeanExpr::lam( + Name::str(Name::anon(), "x".to_string()), + type0.clone(), + LeanExpr::app( + LeanExpr::cnst(f_name.clone(), vec![]), + LeanExpr::bvar(Nat::from(0u64)), + ), + crate::ix::env::BinderInfo::Default, + ); + + // Mutual block: both reference each other + let all = vec![f_name.clone(), g_name.clone()]; + + let f_def = DefinitionVal { + cnst: ConstantVal { + name: f_name.clone(), + level_params: vec![], + typ: fn_type.clone(), + }, + value: f_value, + hints: ReducibilityHints::Regular(1), + safety: DefinitionSafety::Safe, + all: all.clone(), + }; + + let g_def = DefinitionVal { + cnst: ConstantVal { + name: g_name.clone(), + level_params: vec![], + typ: fn_type, + }, + value: g_value, + hints: ReducibilityHints::Regular(1), + safety: DefinitionSafety::Safe, + all: all.clone(), + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(f_name.clone(), LeanConstantInfo::DefnInfo(f_def)); + lean_env.insert(g_name.clone(), LeanConstantInfo::DefnInfo(g_def)); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Should have a mutual block + assert!(!stt.blocks.is_empty(), "Expected at least one mutual block"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check both definitions roundtrip + let recovered_f = dstt.env.get(&f_name).expect("f not found"); + match &*recovered_f { + LeanConstantInfo::DefnInfo(d) => { + assert_eq!(d.cnst.name, f_name); + // The all field should contain both names + assert_eq!(d.all.len(), 2); + }, + _ => panic!("Expected DefnInfo for f"), + } + + let recovered_g = dstt.env.get(&g_name).expect("g not found"); + match &*recovered_g { + LeanConstantInfo::DefnInfo(d) => { + assert_eq!(d.cnst.name, g_name); + assert_eq!(d.all.len(), 2); + }, + _ => panic!("Expected DefnInfo for g"), + } + } + + #[test] + fn test_roundtrip_mutual_inductives() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ + ConstantVal, ConstructorVal, Env as LeanEnv, InductiveVal, + }; + + // Create two mutually recursive inductives (simplified): + // inductive Even : Type where | zero : Even | succ : Odd → Even + // inductive Odd : Type where | succ : Even → Odd + let even_name = Name::str(Name::anon(), "Even".to_string()); + let odd_name = Name::str(Name::anon(), "Odd".to_string()); + let even_zero = Name::str(even_name.clone(), "zero".to_string()); + let even_succ = Name::str(even_name.clone(), "succ".to_string()); + let odd_succ = Name::str(odd_name.clone(), "succ".to_string()); + + let type0 = LeanExpr::sort(Level::succ(Level::zero())); // Type + let even_type = LeanExpr::cnst(even_name.clone(), vec![]); + let odd_type = LeanExpr::cnst(odd_name.clone(), vec![]); + + let all = vec![even_name.clone(), odd_name.clone()]; + + let even_ind = InductiveVal { + cnst: ConstantVal { + name: even_name.clone(), + level_params: vec![], + typ: type0.clone(), + }, + num_params: Nat::from(0u64), + num_indices: Nat::from(0u64), + all: all.clone(), + ctors: vec![even_zero.clone(), even_succ.clone()], + num_nested: Nat::from(0u64), + is_rec: true, // mutually recursive + is_unsafe: false, + is_reflexive: false, + }; + + let odd_ind = InductiveVal { + cnst: ConstantVal { + name: odd_name.clone(), + level_params: vec![], + typ: type0.clone(), + }, + num_params: Nat::from(0u64), + num_indices: Nat::from(0u64), + all: all.clone(), + ctors: vec![odd_succ.clone()], + num_nested: Nat::from(0u64), + is_rec: true, + is_unsafe: false, + is_reflexive: false, + }; + + // Even.zero : Even + let even_zero_ctor = ConstructorVal { + cnst: ConstantVal { + name: even_zero.clone(), + level_params: vec![], + typ: even_type.clone(), + }, + induct: even_name.clone(), + cidx: Nat::from(0u64), + num_params: Nat::from(0u64), + num_fields: Nat::from(0u64), + is_unsafe: false, + }; + + // Even.succ : Odd → Even + let even_succ_type = LeanExpr::all( + Name::anon(), + odd_type.clone(), + even_type.clone(), + crate::ix::env::BinderInfo::Default, + ); + + let even_succ_ctor = ConstructorVal { + cnst: ConstantVal { + name: even_succ.clone(), + level_params: vec![], + typ: even_succ_type, + }, + induct: even_name.clone(), + cidx: Nat::from(1u64), + num_params: Nat::from(0u64), + num_fields: Nat::from(1u64), + is_unsafe: false, + }; + + // Odd.succ : Even → Odd + let odd_succ_type = LeanExpr::all( + Name::anon(), + even_type.clone(), + odd_type.clone(), + crate::ix::env::BinderInfo::Default, + ); + + let odd_succ_ctor = ConstructorVal { + cnst: ConstantVal { + name: odd_succ.clone(), + level_params: vec![], + typ: odd_succ_type, + }, + induct: odd_name.clone(), + cidx: Nat::from(0u64), + num_params: Nat::from(0u64), + num_fields: Nat::from(1u64), + is_unsafe: false, + }; + + let mut lean_env = LeanEnv::default(); + lean_env.insert(even_name.clone(), LeanConstantInfo::InductInfo(even_ind)); + lean_env.insert(odd_name.clone(), LeanConstantInfo::InductInfo(odd_ind)); + lean_env + .insert(even_zero.clone(), LeanConstantInfo::CtorInfo(even_zero_ctor)); + lean_env + .insert(even_succ.clone(), LeanConstantInfo::CtorInfo(even_succ_ctor)); + lean_env + .insert(odd_succ.clone(), LeanConstantInfo::CtorInfo(odd_succ_ctor)); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Should have at least one mutual block + assert!(!stt.blocks.is_empty(), "Expected mutual block for Even/Odd"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check Even roundtrip + let recovered_even = dstt.env.get(&even_name).expect("Even not found"); + match &*recovered_even { + LeanConstantInfo::InductInfo(i) => { + assert_eq!(i.cnst.name, even_name); + assert_eq!(i.ctors.len(), 2); + assert_eq!(i.all.len(), 2); // Even and Odd in mutual block + }, + _ => panic!("Expected InductInfo for Even"), + } + + // Check Odd roundtrip + let recovered_odd = dstt.env.get(&odd_name).expect("Odd not found"); + match &*recovered_odd { + LeanConstantInfo::InductInfo(i) => { + assert_eq!(i.cnst.name, odd_name); + assert_eq!(i.ctors.len(), 1); + assert_eq!(i.all.len(), 2); + }, + _ => panic!("Expected InductInfo for Odd"), + } + + // Check all constructors exist + assert!(dstt.env.contains_key(&even_zero)); + assert!(dstt.env.contains_key(&even_succ)); + assert!(dstt.env.contains_key(&odd_succ)); + } + + #[test] + fn test_roundtrip_inductive_with_recursor() { + use crate::ix::decompile::decompile_env; + use crate::ix::env::{ConstantVal, InductiveVal, RecursorVal}; + + // Create Empty type with recursor (no constructors) + // inductive Empty : Type + // Empty.rec.{u} : (motive : Empty → Sort u) → (e : Empty) → motive e + let empty_name = Name::str(Name::anon(), "Empty".to_string()); + let empty_rec_name = Name::str(empty_name.clone(), "rec".to_string()); + let u = Name::str(Name::anon(), "u".to_string()); + + let type0 = LeanExpr::sort(Level::succ(Level::zero())); // Type + let empty_type = LeanExpr::cnst(empty_name.clone(), vec![]); + + let inductive = InductiveVal { + cnst: ConstantVal { + name: empty_name.clone(), + level_params: vec![], + typ: type0.clone(), + }, + num_params: Nat::from(0u64), + num_indices: Nat::from(0u64), + all: vec![empty_name.clone()], + ctors: vec![], // No constructors! + num_nested: Nat::from(0u64), + is_rec: false, + is_unsafe: false, + is_reflexive: false, + }; + + // Empty.rec.{u} : (motive : Empty → Sort u) → (e : Empty) → motive e + let motive_type = LeanExpr::all( + Name::anon(), + empty_type.clone(), + LeanExpr::sort(Level::param(u.clone())), + crate::ix::env::BinderInfo::Default, + ); + let rec_type = LeanExpr::all( + Name::str(Name::anon(), "motive".to_string()), + motive_type, + LeanExpr::all( + Name::str(Name::anon(), "e".to_string()), + empty_type.clone(), + LeanExpr::app( + LeanExpr::bvar(Nat::from(1u64)), + LeanExpr::bvar(Nat::from(0u64)), + ), + crate::ix::env::BinderInfo::Default, + ), + crate::ix::env::BinderInfo::Implicit, + ); + + let recursor = RecursorVal { + cnst: ConstantVal { + name: empty_rec_name.clone(), + level_params: vec![u.clone()], + typ: rec_type, + }, + all: vec![empty_name.clone()], + num_params: Nat::from(0u64), + num_indices: Nat::from(0u64), + num_motives: Nat::from(1u64), + num_minors: Nat::from(0u64), // No minor premises for Empty + rules: vec![], // No rules since no constructors + k: true, + is_unsafe: false, + }; + + let mut lean_env = LeanEnv::default(); + lean_env + .insert(empty_name.clone(), LeanConstantInfo::InductInfo(inductive)); + lean_env + .insert(empty_rec_name.clone(), LeanConstantInfo::RecInfo(recursor)); + let lean_env = Arc::new(lean_env); + + // Compile + let stt = compile_env(&lean_env).expect("compile_env failed"); + + // Decompile + let dstt = decompile_env(&stt).expect("decompile_env failed"); + + // Check inductive roundtrip + let recovered_ind = dstt.env.get(&empty_name).expect("Empty not found"); + match &*recovered_ind { + LeanConstantInfo::InductInfo(i) => { + assert_eq!(i.cnst.name, empty_name); + assert_eq!(i.ctors.len(), 0); + }, + _ => panic!("Expected InductInfo"), + } + + // Check recursor roundtrip + let recovered_rec = + dstt.env.get(&empty_rec_name).expect("Empty.rec not found"); + match &*recovered_rec { + LeanConstantInfo::RecInfo(r) => { + assert_eq!(r.cnst.name, empty_rec_name); + assert_eq!(r.rules.len(), 0); + assert_eq!(r.cnst.level_params.len(), 1); + }, + _ => panic!("Expected RecInfo"), + } + } +} diff --git a/src/ix/condense.rs b/src/ix/condense.rs index a5aa39a6..ab0a6b27 100644 --- a/src/ix/condense.rs +++ b/src/ix/condense.rs @@ -1,3 +1,8 @@ +//! Computes strongly connected components (SCCs) using iterative Tarjan's algorithm. +//! +//! Produces a condensation of the reference graph: each SCC becomes a single block. +//! Used to identify mutual definition groups for the compilation pipeline. + use rustc_hash::FxHashMap; use crate::{ @@ -6,9 +11,13 @@ use crate::{ ix::graph::{NameSet, RefMap}, }; +/// The condensation of a reference graph into strongly connected components. pub struct CondensedBlocks { + /// Maps each name to the representative (low-link root) of its SCC. pub low_links: FxHashMap, + /// Maps each SCC representative to the set of names in that component. pub blocks: RefMap, + /// Maps each SCC representative to the set of names referenced outside its component. pub block_refs: RefMap, } diff --git a/src/ix/decompile.rs b/src/ix/decompile.rs index 01904719..88082135 100644 --- a/src/ix/decompile.rs +++ b/src/ix/decompile.rs @@ -1,1074 +1,1559 @@ +//! Decompilation from Ixon format back to Lean environment. +//! +//! This module decompiles alpha-invariant Ixon representations back to +//! Lean constants, expanding Share references and reattaching metadata. + +#![allow(clippy::cast_possible_truncation)] +#![allow(clippy::cast_precision_loss)] +#![allow(clippy::cast_possible_wrap)] +#![allow(clippy::map_err_ignore)] +#![allow(clippy::match_same_arms)] + use crate::{ - ix::address::{Address, MetaAddress}, + ix::address::Address, ix::compile::CompileState, ix::env::{ - AxiomVal, BinderInfo, ConstantInfo, ConstantVal, ConstructorVal, - DataValue as LeanDataValue, DefinitionSafety, DefinitionVal, Env, Expr, - InductiveVal, Int, Level, Literal, Name, OpaqueVal, QuotVal, RecursorRule, - RecursorVal, SourceInfo as LeanSourceInfo, Substring as LeanSubstring, - Syntax as LeanSyntax, SyntaxPreresolved, TheoremVal, + AxiomVal, BinderInfo, ConstantInfo as LeanConstantInfo, ConstantVal, + ConstructorVal, DataValue as LeanDataValue, DefinitionSafety, + DefinitionVal, Env as LeanEnv, Expr as LeanExpr, InductiveVal, Int, Level, + Literal, Name, OpaqueVal, QuotVal, RecursorRule as LeanRecursorRule, + RecursorVal, ReducibilityHints, SourceInfo, Substring, Syntax, + SyntaxPreresolved, TheoremVal, }, ix::ixon::{ - self, Constructor, DataValue, DefKind, Definition, Inductive, Ixon, - Metadata, Metadatum, MutConst, Preresolved, Recursor, Serialize, - SourceInfo, Substring, Syntax, + DecompileError, Tag0, + constant::{ + Axiom, Constant, ConstantInfo, Constructor, DefKind, Definition, + Inductive, MutConst, Quotient, Recursor, + }, + env::Named, + expr::Expr, + metadata::{ConstantMeta, DataValue, ExprMeta, ExprMetaData, KVMap}, + univ::Univ, }, - ix::mutual::MutCtx, + ix::mutual::{MutCtx, all_to_ctx}, lean::nat::Nat, }; -use blake3::Hash; use dashmap::DashMap; -use itertools::Itertools; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use rustc_hash::FxHashMap; -use std::str::Utf8Error; - -#[derive(Debug)] -pub enum DecompileError { - UnknownStoreAddress, - Deserialize(String), - Utf8(Utf8Error), - BadBlock(Box<(Ixon, Ixon)>), - BadName(Box), - BadLevel(Box), - BadExprERec(Name, Box), - ConstName(Name, Name), - BadDef(Box<(Definition, Metadata)>), - BadInd(Box<(Inductive, Metadata)>), - BadRec(Box<(Recursor, Metadata)>), - BadCtor(Box<(Constructor, Metadata)>), - MismatchedLevels(Name, Nat, Vec
), - MismatchedCtors(Name, Vec, Vec
), - MalformedProjection(Name, Address), - ConstAddrNotDecompiled(Name, Box), - ConstAddrMismatch(Name, Box, Name), - ConstNameNotCompiled(Name, Box), - ConstNameMismatch(Name, Box<(MetaAddress, MetaAddress)>), - ConstMissingInOriginal(Name), - ConstHashMismatch(Name, Box<(Hash, Hash)>), - EnvSizeMismatch { original: usize, decompiled: usize }, - Todo, -} +use std::sync::Arc; #[derive(Default, Debug)] pub struct DecompileState { - pub names: DashMap, - pub consts: DashMap, - pub block_ctx: DashMap, - pub env: DashMap, + /// Decompiled environment + pub env: DashMap, } #[derive(Debug)] pub struct DecompileStateStats { - pub names: usize, - pub consts: usize, - pub block_ctx: usize, pub env: usize, } impl DecompileState { pub fn stats(&self) -> DecompileStateStats { - DecompileStateStats { - names: self.names.len(), - consts: self.consts.len(), - block_ctx: self.block_ctx.len(), - env: self.env.len(), - } + DecompileStateStats { env: self.env.len() } } } +/// Per-block decompilation cache. #[derive(Default, Debug)] pub struct BlockCache { + /// Mutual context for resolving Rec references pub ctx: MutCtx, - pub exprs: FxHashMap, - pub univs: FxHashMap, + /// Sharing vector for expanding Share references + pub sharing: Vec>, + /// Reference table for resolving Ref indices to addresses + pub refs: Vec
, + /// Universe table for resolving universe indices + pub univ_table: Vec>, + /// Cache for decompiled universes + pub univ_cache: FxHashMap<*const Univ, Level>, + /// Cache for decompiled expressions keyed by (Ixon pointer, arena index). + /// Same Ixon expression at same arena index → same metadata → same result. + /// Same Ixon expression at different arena index → different metadata → different cache key. + pub expr_cache: FxHashMap<(*const Expr, u64), LeanExpr>, + /// Current constant being decompiled (for error messages) + pub current_const: String, } -pub fn read_ixon( +// =========================================================================== +// Blob reading utilities +// =========================================================================== + +/// Read raw bytes from the blob store. +fn read_blob( addr: &Address, stt: &CompileState, -) -> Result { - let bytes = stt.store.get(addr).ok_or(DecompileError::UnknownStoreAddress)?; - Ixon::get(&mut bytes.as_slice()).map_err(DecompileError::Deserialize) +) -> Result, DecompileError> { + stt.env.get_blob(addr).ok_or(DecompileError::BlobNotFound(addr.clone())) } -pub fn read_nat( - addr: &Address, - stt: &CompileState, -) -> Result { - let bytes = stt.store.get(addr).ok_or(DecompileError::UnknownStoreAddress)?; +/// Read a Nat from the blob store. +fn read_nat(addr: &Address, stt: &CompileState) -> Result { + let bytes = read_blob(addr, stt)?; Ok(Nat::from_le_bytes(&bytes)) } -pub fn read_string( +/// Read a string from the blob store. +fn read_string( addr: &Address, stt: &CompileState, ) -> Result { - let bytes = stt.store.get(addr).ok_or(DecompileError::UnknownStoreAddress)?; - let str = str::from_utf8(&bytes).map_err(DecompileError::Utf8)?; - Ok(str.to_owned()) + let bytes = read_blob(addr, stt)?; + String::from_utf8(bytes).map_err(|_| DecompileError::BadBlobFormat { + addr: addr.clone(), + expected: "UTF-8 string".into(), + }) } -pub fn read_meta( +/// Read a Constant from the const store. +fn read_const( addr: &Address, stt: &CompileState, -) -> Result { - let bytes = stt.store.get(addr).ok_or(DecompileError::UnknownStoreAddress)?; - Metadata::get(&mut bytes.as_slice()).map_err(DecompileError::Deserialize) +) -> Result { + stt.env.get_const(addr).ok_or(DecompileError::MissingAddress(addr.clone())) } -pub fn decompile_name( - addr: &Address, - stt: &CompileState, - dstt: &DecompileState, -) -> Result { - match dstt.names.get(addr) { - Some(name) => Ok(name.clone()), - None => { - let name = match read_ixon(addr, stt)? { - Ixon::NAnon => Name::anon(), - Ixon::NStr(n, s) => { - Name::str(decompile_name(&n, stt, dstt)?, read_string(&s, stt)?) - }, - Ixon::NNum(n, s) => { - Name::num(decompile_name(&n, stt, dstt)?, read_nat(&s, stt)?) - }, - e => return Err(DecompileError::BadName(Box::new(e))), - }; - dstt.names.insert(addr.clone(), name.clone()); - Ok(name) - }, - } -} +// =========================================================================== +// DataValue and KVMap decompilation +// =========================================================================== -pub fn decompile_level( - addr: &Address, - lvls: &[Name], - cache: &mut BlockCache, +/// Decompile an Ixon DataValue (Address-based) to a Lean DataValue. +fn decompile_data_value( + dv: &DataValue, stt: &CompileState, -) -> Result { - if let Some(cached) = cache.univs.get(addr) { - return Ok(cached.clone()); - } - let level = match read_ixon(addr, stt)? { - Ixon::UZero => Ok(Level::zero()), - Ixon::USucc(x) => { - let inner = decompile_level(&x, lvls, cache, stt)?; - Ok(Level::succ(inner)) +) -> Result { + match dv { + DataValue::OfString(addr) => { + let s = read_string(addr, stt)?; + Ok(LeanDataValue::OfString(s)) }, - Ixon::UMax(x, y) => { - let lx = decompile_level(&x, lvls, cache, stt)?; - let ly = decompile_level(&y, lvls, cache, stt)?; - Ok(Level::max(lx, ly)) + DataValue::OfBool(b) => Ok(LeanDataValue::OfBool(*b)), + DataValue::OfName(addr) => { + let name = decompile_name(addr, stt)?; + Ok(LeanDataValue::OfName(name)) }, - Ixon::UIMax(x, y) => { - let lx = decompile_level(&x, lvls, cache, stt)?; - let ly = decompile_level(&y, lvls, cache, stt)?; - Ok(Level::imax(lx, ly)) + DataValue::OfNat(addr) => { + let n = read_nat(addr, stt)?; + Ok(LeanDataValue::OfNat(n)) }, - Ixon::UVar(idx) => { - let idx_usize: usize = - idx.0.try_into().map_err(|_e| DecompileError::Todo)?; - let name = lvls.get(idx_usize).ok_or(DecompileError::Todo)?.clone(); - Ok(Level::param(name)) + DataValue::OfInt(addr) => { + let bytes = read_blob(addr, stt)?; + let int = deserialize_int(&bytes)?; + Ok(LeanDataValue::OfInt(int)) }, - e => Err(DecompileError::BadLevel(Box::new(e))), - }?; - cache.univs.insert(addr.clone(), level.clone()); - Ok(level) + DataValue::OfSyntax(addr) => { + let bytes = read_blob(addr, stt)?; + let syntax = deserialize_syntax(&bytes, stt)?; + Ok(LeanDataValue::OfSyntax(Box::new(syntax))) + }, + } } -fn decompile_levels( - addrs: &[Address], - lvls: &[Name], - cache: &mut BlockCache, - stt: &CompileState, -) -> Result, DecompileError> { - addrs.iter().map(|a| decompile_level(a, lvls, cache, stt)).collect() +/// Deserialize an Int from bytes (mirrors compile-side serialization). +fn deserialize_int(bytes: &[u8]) -> Result { + if bytes.is_empty() { + return Err(DecompileError::BadConstantFormat { + msg: "deserialize_int: empty".into(), + }); + } + match bytes[0] { + 0 => Ok(Int::OfNat(Nat::from_le_bytes(&bytes[1..]))), + 1 => Ok(Int::NegSucc(Nat::from_le_bytes(&bytes[1..]))), + _ => Err(DecompileError::BadConstantFormat { + msg: "deserialize_int: invalid tag".into(), + }), + } } -fn decompile_substring( - ss: &Substring, - stt: &CompileState, -) -> Result { - Ok(LeanSubstring { - str: read_string(&ss.str, stt)?, - start_pos: ss.start_pos.clone(), - stop_pos: ss.stop_pos.clone(), +/// Read a Tag0-encoded u64 from a byte slice, advancing the cursor. +fn read_tag0(buf: &mut &[u8]) -> Result { + Tag0::get(buf).map(|t| t.size).map_err(|_| { + DecompileError::BadConstantFormat { + msg: "read_tag0: unexpected EOF".into(), + } }) } -fn decompile_source_info( - info: &SourceInfo, - stt: &CompileState, -) -> Result { - match info { - SourceInfo::Original(l, p, t, e) => Ok(LeanSourceInfo::Original( - decompile_substring(l, stt)?, - p.clone(), - decompile_substring(t, stt)?, - e.clone(), - )), - SourceInfo::Synthetic(p, e, c) => { - Ok(LeanSourceInfo::Synthetic(p.clone(), e.clone(), *c)) - }, - SourceInfo::None => Ok(LeanSourceInfo::None), +/// Read exactly 32 bytes (Address) from a byte slice, advancing the cursor. +fn read_addr_bytes(buf: &mut &[u8]) -> Result { + if buf.len() < 32 { + return Err(DecompileError::BadConstantFormat { + msg: "read_addr: need 32 bytes".into(), + }); } + let (bytes, rest) = buf.split_at(32); + *buf = rest; + Address::from_slice(bytes).map_err(|_| DecompileError::BadConstantFormat { + msg: "read_addr: invalid".into(), + }) } -fn decompile_preresolved( - pre: &Preresolved, +/// Deserialize a Substring from bytes. +fn deserialize_substring( + buf: &mut &[u8], stt: &CompileState, - dstt: &DecompileState, -) -> Result { - match pre { - Preresolved::Namespace(ns) => { - Ok(SyntaxPreresolved::Namespace(decompile_name(ns, stt, dstt)?)) +) -> Result { + let str_addr = read_addr_bytes(buf)?; + let s = read_string(&str_addr, stt)?; + let start_pos = Nat::from(read_tag0(buf)?); + let stop_pos = Nat::from(read_tag0(buf)?); + Ok(Substring { str: s, start_pos, stop_pos }) +} + +/// Deserialize SourceInfo from bytes. +fn deserialize_source_info( + buf: &mut &[u8], + stt: &CompileState, +) -> Result { + if buf.is_empty() { + return Err(DecompileError::BadConstantFormat { + msg: "source_info: empty".into(), + }); + } + let tag = buf[0]; + *buf = &buf[1..]; + match tag { + 0 => { + let leading = deserialize_substring(buf, stt)?; + let leading_pos = Nat::from(read_tag0(buf)?); + let trailing = deserialize_substring(buf, stt)?; + let trailing_pos = Nat::from(read_tag0(buf)?); + Ok(SourceInfo::Original(leading, leading_pos, trailing, trailing_pos)) }, - Preresolved::Decl(n, fields) => { - let name = decompile_name(n, stt, dstt)?; - let fields: Result, _> = - fields.iter().map(|f| read_string(f, stt)).collect(); - Ok(SyntaxPreresolved::Decl(name, fields?)) + 1 => { + let start = Nat::from(read_tag0(buf)?); + let end = Nat::from(read_tag0(buf)?); + if buf.is_empty() { + return Err(DecompileError::BadConstantFormat { + msg: "source_info: missing canonical".into(), + }); + } + let canonical = buf[0] != 0; + *buf = &buf[1..]; + Ok(SourceInfo::Synthetic(start, end, canonical)) }, + 2 => Ok(SourceInfo::None), + _ => Err(DecompileError::BadConstantFormat { + msg: "source_info: invalid tag".into(), + }), } } -fn decompile_syntax( - addr: &Address, +/// Deserialize a SyntaxPreresolved from bytes. +fn deserialize_preresolved( + buf: &mut &[u8], stt: &CompileState, - dstt: &DecompileState, -) -> Result { - let bytes = stt.store.get(addr).ok_or(DecompileError::UnknownStoreAddress)?; - let syn = - Syntax::get(&mut bytes.as_slice()).map_err(DecompileError::Deserialize)?; - - match syn { - Syntax::Missing => Ok(LeanSyntax::Missing), - Syntax::Node(info, kind, args) => { - let info = decompile_source_info(&info, stt)?; - let kind = decompile_name(&kind, stt, dstt)?; - let args: Result, _> = - args.iter().map(|a| decompile_syntax(a, stt, dstt)).collect(); - Ok(LeanSyntax::Node(info, kind, args?)) - }, - Syntax::Atom(info, val) => { - let info = decompile_source_info(&info, stt)?; - Ok(LeanSyntax::Atom(info, read_string(&val, stt)?)) +) -> Result { + if buf.is_empty() { + return Err(DecompileError::BadConstantFormat { + msg: "preresolved: empty".into(), + }); + } + let tag = buf[0]; + *buf = &buf[1..]; + match tag { + 0 => { + let name_addr = read_addr_bytes(buf)?; + let name = decompile_name(&name_addr, stt)?; + Ok(SyntaxPreresolved::Namespace(name)) }, - Syntax::Ident(info, raw_val, val, preresolved) => { - let info = decompile_source_info(&info, stt)?; - let raw_val = decompile_substring(&raw_val, stt)?; - let val = decompile_name(&val, stt, dstt)?; - let pres: Result, _> = preresolved - .iter() - .map(|p| decompile_preresolved(p, stt, dstt)) - .collect(); - Ok(LeanSyntax::Ident(info, raw_val, val, pres?)) + 1 => { + let name_addr = read_addr_bytes(buf)?; + let name = decompile_name(&name_addr, stt)?; + let count = read_tag0(buf)? as usize; + let mut fields = Vec::with_capacity(count); + for _ in 0..count { + let field_addr = read_addr_bytes(buf)?; + let field = read_string(&field_addr, stt)?; + fields.push(field); + } + Ok(SyntaxPreresolved::Decl(name, fields)) }, + _ => Err(DecompileError::BadConstantFormat { + msg: "preresolved: invalid tag".into(), + }), } } -fn decompile_data_value( - dv: &DataValue, +/// Deserialize a Syntax from bytes (mirrors compile-side serialize_syntax). +fn deserialize_syntax( + bytes: &[u8], stt: &CompileState, - dstt: &DecompileState, -) -> Result { - match dv { - DataValue::OfString(addr) => { - Ok(LeanDataValue::OfString(read_string(addr, stt)?)) - }, - DataValue::OfBool(b) => Ok(LeanDataValue::OfBool(*b)), - DataValue::OfName(addr) => { - Ok(LeanDataValue::OfName(decompile_name(addr, stt, dstt)?)) +) -> Result { + let mut buf = bytes; + deserialize_syntax_inner(&mut buf, stt) +} + +/// Recursive inner deserializer for Syntax. +fn deserialize_syntax_inner( + buf: &mut &[u8], + stt: &CompileState, +) -> Result { + if buf.is_empty() { + return Err(DecompileError::BadConstantFormat { + msg: "syntax: empty".into(), + }); + } + let tag = buf[0]; + *buf = &buf[1..]; + match tag { + 0 => Ok(Syntax::Missing), + 1 => { + let info = deserialize_source_info(buf, stt)?; + let kind_addr = read_addr_bytes(buf)?; + let kind = decompile_name(&kind_addr, stt)?; + let arg_count = read_tag0(buf)? as usize; + let mut args = Vec::with_capacity(arg_count); + for _ in 0..arg_count { + args.push(deserialize_syntax_inner(buf, stt)?); + } + Ok(Syntax::Node(info, kind, args)) }, - DataValue::OfNat(addr) => Ok(LeanDataValue::OfNat(read_nat(addr, stt)?)), - DataValue::OfInt(addr) => { - let bytes = - stt.store.get(addr).ok_or(DecompileError::UnknownStoreAddress)?; - let int = - Int::get(&mut bytes.as_slice()).map_err(DecompileError::Deserialize)?; - Ok(LeanDataValue::OfInt(int)) + 2 => { + let info = deserialize_source_info(buf, stt)?; + let val_addr = read_addr_bytes(buf)?; + let val = read_string(&val_addr, stt)?; + Ok(Syntax::Atom(info, val)) }, - DataValue::OfSyntax(addr) => { - Ok(LeanDataValue::OfSyntax(Box::new(decompile_syntax(addr, stt, dstt)?))) + 3 => { + let info = deserialize_source_info(buf, stt)?; + let raw_val = deserialize_substring(buf, stt)?; + let val_addr = read_addr_bytes(buf)?; + let val = decompile_name(&val_addr, stt)?; + let pr_count = read_tag0(buf)? as usize; + let mut preresolved = Vec::with_capacity(pr_count); + for _ in 0..pr_count { + preresolved.push(deserialize_preresolved(buf, stt)?); + } + Ok(Syntax::Ident(info, raw_val, val, preresolved)) }, + _ => Err(DecompileError::BadConstantFormat { + msg: "syntax: invalid tag".into(), + }), } } -fn decompile_kv_map( - kvs: &[(Address, DataValue)], + +/// Decompile an Ixon KVMap (Address-based) to a Lean KVMap (Name/DataValue). +fn decompile_kvmap( + kvmap: &KVMap, stt: &CompileState, - dstt: &DecompileState, ) -> Result, DecompileError> { - let mut kv = vec![]; - for (n, v) in kvs { - let name = decompile_name(n, stt, dstt)?; - let val = decompile_data_value(v, stt, dstt)?; - kv.push((name, val)) + kvmap + .iter() + .map(|(k_addr, v)| { + let name = decompile_name(k_addr, stt)?; + let val = decompile_data_value(v, stt)?; + Ok((name, val)) + }) + .collect() +} + +/// Wrap a LeanExpr in pre-decompiled mdata layers. +/// +/// The `lean_mdata` vec stores layers outermost-first. +/// We iterate in reverse to wrap innermost-first: +/// given [kv_outer, kv_inner], result is mdata(kv_outer, mdata(kv_inner, expr)). +fn apply_mdata( + mut expr: LeanExpr, + lean_mdata: Vec>, +) -> LeanExpr { + for kvmap in lean_mdata.into_iter().rev() { + expr = LeanExpr::mdata(kvmap, expr); } - Ok(kv) + expr +} + +// =========================================================================== +// Name decompilation +// =========================================================================== + +/// Look up a Name by its address. +pub fn decompile_name( + addr: &Address, + stt: &CompileState, +) -> Result { + stt + .env + .names + .get(addr) + .map(|r| r.clone()) + .ok_or(DecompileError::MissingAddress(addr.clone())) } +// =========================================================================== +// Universe decompilation +// =========================================================================== + +/// Decompile an Ixon Univ to a Lean Level. +pub fn decompile_univ( + univ: &Arc, + lvl_names: &[Name], + cache: &mut BlockCache, +) -> Result { + let ptr = Arc::as_ptr(univ); + if let Some(cached) = cache.univ_cache.get(&ptr) { + return Ok(cached.clone()); + } + + let level = match univ.as_ref() { + Univ::Zero => Level::zero(), + Univ::Succ(inner) => { + let inner_level = decompile_univ(inner, lvl_names, cache)?; + Level::succ(inner_level) + }, + Univ::Max(a, b) => { + let la = decompile_univ(a, lvl_names, cache)?; + let lb = decompile_univ(b, lvl_names, cache)?; + Level::max(la, lb) + }, + Univ::IMax(a, b) => { + let la = decompile_univ(a, lvl_names, cache)?; + let lb = decompile_univ(b, lvl_names, cache)?; + Level::imax(la, lb) + }, + Univ::Var(idx) => { + let idx_usize = *idx as usize; + let name = lvl_names + .get(idx_usize) + .ok_or_else(|| DecompileError::InvalidUnivVarIndex { + idx: *idx, + max: lvl_names.len(), + constant: cache.current_const.clone(), + })? + .clone(); + Level::param(name) + }, + }; + + cache.univ_cache.insert(ptr, level.clone()); + Ok(level) +} + +// =========================================================================== +// Expression decompilation +// =========================================================================== + +/// Decompile an Ixon Expr to a Lean Expr with arena-based metadata restoration. +/// +/// Traverses the arena tree following child pointers. Share references are +/// expanded with the same arena_idx (parent's child pointer already captures +/// the correct metadata subtree). Mdata arena nodes are collected and applied +/// as wrappers. pub fn decompile_expr( - addr: MetaAddress, - lvls: &[Name], + expr: &Arc, + arena: &ExprMeta, + arena_idx: u64, + lvl_names: &[Name], cache: &mut BlockCache, stt: &CompileState, - dstt: &DecompileState, -) -> Result { + _dstt: &DecompileState, +) -> Result { + // Lean mdata layers: Vec of KVMaps (outermost-first) + type LeanMdata = Vec>; + + /// Default node for out-of-bounds arena access (empty arena or invalid index). + const DEFAULT_NODE: ExprMetaData = ExprMetaData::Leaf; + enum Frame { - Decompile(MetaAddress), - Mdata(Vec<(Name, LeanDataValue)>), - App, - Lam(Name, BinderInfo), - All(Name, BinderInfo), - Let(Name, bool), - Proj(Name, Nat), - Cache(MetaAddress), - } - if let Some(expr) = cache.exprs.get(&addr) { - return Ok(expr.clone()); + Decompile(Arc, u64), + BuildApp(LeanMdata), + BuildLam(Name, BinderInfo, LeanMdata), + BuildAll(Name, BinderInfo, LeanMdata), + BuildLet(Name, bool, LeanMdata), + BuildProj(Name, Nat, LeanMdata), + CacheResult(*const Expr, u64), } - let mut stack = vec![Frame::Decompile(addr)]; - let mut res = vec![]; + let mut stack: Vec = vec![Frame::Decompile(expr.clone(), arena_idx)]; + let mut results: Vec = Vec::new(); while let Some(frame) = stack.pop() { match frame { - Frame::Decompile(addr) => { - if let Some(expr) = cache.exprs.get(&addr) { - res.push(expr.clone()); + Frame::Decompile(e, idx) => { + // Expand Share transparently with the SAME arena_idx + if let Expr::Share(share_idx) = e.as_ref() { + let shared_expr = cache + .sharing + .get(*share_idx as usize) + .ok_or_else(|| DecompileError::InvalidShareIndex { + idx: *share_idx, + max: cache.sharing.len(), + constant: cache.current_const.clone(), + })? + .clone(); + stack.push(Frame::Decompile(shared_expr, idx)); continue; } - let meta_ixon = read_ixon(&addr.meta, stt)?; - if let Ixon::Meta(m) = &meta_ixon - && let [Metadatum::KVMap(kv), Metadatum::Link(inner_meta)] = - m.nodes.as_slice() - { - let kv = decompile_kv_map(kv, stt, dstt)?; - stack.push(Frame::Cache(addr.clone())); - stack.push(Frame::Mdata(kv)); - stack.push(Frame::Decompile(MetaAddress { - data: addr.data.clone(), - meta: inner_meta.clone(), - })); + + // Cache check: (Ixon pointer, arena index) + let cache_key = (Arc::as_ptr(&e), idx); + if let Some(cached) = cache.expr_cache.get(&cache_key) { + results.push(cached.clone()); continue; } - let data_ixon = read_ixon(&addr.data, stt)?; - match (&data_ixon, &meta_ixon) { - (Ixon::EVar(idx), Ixon::Meta(m)) if m.nodes.is_empty() => { - let expr = Expr::bvar(idx.clone()); - cache.exprs.insert(addr, expr.clone()); - res.push(expr); + + // Follow Mdata chain in arena, collecting mdata layers + let mut current_idx = idx; + let mut mdata_layers: LeanMdata = Vec::new(); + while let ExprMetaData::Mdata { mdata, child } = + arena.nodes.get(current_idx as usize).unwrap_or(&DEFAULT_NODE) + { + for kvm in mdata { + mdata_layers.push(decompile_kvmap(kvm, stt)?); + } + current_idx = *child; + } + + let node = + arena.nodes.get(current_idx as usize).unwrap_or(&DEFAULT_NODE); + + // Push CacheResult frame + stack.push(Frame::CacheResult(Arc::as_ptr(&e), idx)); + + match (node, e.as_ref()) { + // Leaf nodes: Var, Sort, Nat, Str + (_, Expr::Var(v)) => { + let expr = apply_mdata(LeanExpr::bvar(Nat::from(*v)), mdata_layers); + results.push(expr); }, - (Ixon::ESort(u_data), Ixon::Meta(m)) if m.nodes.is_empty() => { - let level = decompile_level(u_data, lvls, cache, stt)?; - let expr = Expr::sort(level); - cache.exprs.insert(addr, expr.clone()); - res.push(expr); + + (_, Expr::Sort(univ_idx)) => { + let univ = cache + .univ_table + .get(*univ_idx as usize) + .ok_or_else(|| DecompileError::InvalidUnivIndex { + idx: *univ_idx, + univs_len: cache.univ_table.len(), + constant: cache.current_const.clone(), + })? + .clone(); + let level = decompile_univ(&univ, lvl_names, cache)?; + let expr = apply_mdata(LeanExpr::sort(level), mdata_layers); + results.push(expr); }, - (Ixon::ERef(_, lvl_datas), Ixon::Meta(m)) => { - match m.nodes.as_slice() { - [Metadatum::Link(name_addr), Metadatum::Link(_)] => { - let name = decompile_name(name_addr, stt, dstt)?; - let levels = decompile_levels(lvl_datas, lvls, cache, stt)?; - let expr = Expr::cnst(name, levels); - cache.exprs.insert(addr, expr.clone()); - res.push(expr); - }, - _ => return Err(DecompileError::Todo), - } + + (_, Expr::Nat(ref_idx)) => { + let addr = cache.refs.get(*ref_idx as usize).ok_or_else(|| { + DecompileError::InvalidRefIndex { + idx: *ref_idx, + refs_len: cache.refs.len(), + constant: cache.current_const.clone(), + } + })?; + let n = read_nat(addr, stt)?; + let expr = + apply_mdata(LeanExpr::lit(Literal::NatVal(n)), mdata_layers); + results.push(expr); }, - (Ixon::ERec(idx, lvl_datas), Ixon::Meta(m)) => { - match m.nodes.as_slice() { - [Metadatum::Link(name_addr)] => { - let name = decompile_name(name_addr, stt, dstt)?; - let levels = decompile_levels(lvl_datas, lvls, cache, stt)?; - match cache.ctx.get(&name) { - Some(i) if i == idx => {}, - _ => { - return Err(DecompileError::BadExprERec( - name, - Box::new(idx.clone()), - )); - }, - } - let expr = Expr::cnst(name, levels); - cache.exprs.insert(addr, expr.clone()); - res.push(expr); - }, - _ => return Err(DecompileError::Todo), - } + + (_, Expr::Str(ref_idx)) => { + let addr = cache.refs.get(*ref_idx as usize).ok_or_else(|| { + DecompileError::InvalidRefIndex { + idx: *ref_idx, + refs_len: cache.refs.len(), + constant: cache.current_const.clone(), + } + })?; + let s = read_string(addr, stt)?; + let expr = + apply_mdata(LeanExpr::lit(Literal::StrVal(s)), mdata_layers); + results.push(expr); + }, + + // Ref: resolve name from arena Ref node or fallback + ( + ExprMetaData::Ref { name: name_addr }, + Expr::Ref(ref_idx, univ_indices), + ) => { + let name = decompile_name(name_addr, stt).unwrap_or_else(|_| { + // Fallback: resolve from refs table + cache + .refs + .get(*ref_idx as usize) + .and_then(|addr| stt.env.get_name_by_addr(addr)) + .unwrap_or_else(Name::anon) + }); + let levels = + decompile_univ_indices(univ_indices, lvl_names, cache)?; + let expr = apply_mdata(LeanExpr::cnst(name, levels), mdata_layers); + results.push(expr); + }, + + (_, Expr::Ref(ref_idx, univ_indices)) => { + // No Ref metadata — resolve from refs table + let addr = cache.refs.get(*ref_idx as usize).ok_or_else(|| { + DecompileError::InvalidRefIndex { + idx: *ref_idx, + refs_len: cache.refs.len(), + constant: cache.current_const.clone(), + } + })?; + let name = stt + .env + .get_name_by_addr(addr) + .ok_or(DecompileError::MissingAddress(addr.clone()))?; + let levels = + decompile_univ_indices(univ_indices, lvl_names, cache)?; + let expr = apply_mdata(LeanExpr::cnst(name, levels), mdata_layers); + results.push(expr); + }, + + // Rec: resolve name from arena Ref node or fallback + ( + ExprMetaData::Ref { name: name_addr }, + Expr::Rec(rec_idx, univ_indices), + ) => { + let name = decompile_name(name_addr, stt).unwrap_or_else(|_| { + cache + .ctx + .iter() + .find(|(_, i)| i.to_u64() == Some(*rec_idx)) + .map_or_else(Name::anon, |(n, _)| n.clone()) + }); + let levels = + decompile_univ_indices(univ_indices, lvl_names, cache)?; + let expr = apply_mdata(LeanExpr::cnst(name, levels), mdata_layers); + results.push(expr); }, - (Ixon::ENat(nat_addr), Ixon::Meta(m)) if m.nodes.is_empty() => { - let n = read_nat(nat_addr, stt)?; - let expr = Expr::lit(Literal::NatVal(n)); - cache.exprs.insert(addr, expr.clone()); - res.push(expr); + + (_, Expr::Rec(rec_idx, univ_indices)) => { + let name = cache + .ctx + .iter() + .find(|(_, i)| i.to_u64() == Some(*rec_idx)) + .map(|(n, _)| n.clone()) + .ok_or_else(|| DecompileError::InvalidRecIndex { + idx: *rec_idx, + ctx_size: cache.ctx.len(), + constant: cache.current_const.clone(), + })?; + let levels = + decompile_univ_indices(univ_indices, lvl_names, cache)?; + let expr = apply_mdata(LeanExpr::cnst(name, levels), mdata_layers); + results.push(expr); }, - (Ixon::EStr(str_addr), Ixon::Meta(m)) if m.nodes.is_empty() => { - let s = read_string(str_addr, stt)?; - let expr = Expr::lit(Literal::StrVal(s)); - cache.exprs.insert(addr, expr.clone()); - res.push(expr); + + // App: follow arena children + (ExprMetaData::App { children }, Expr::App(f, a)) => { + stack.push(Frame::BuildApp(mdata_layers)); + stack.push(Frame::Decompile(a.clone(), children[1])); + stack.push(Frame::Decompile(f.clone(), children[0])); }, - (Ixon::EApp(f_data, a_data), Ixon::Meta(m)) => { - match m.nodes.as_slice() { - [Metadatum::Link(f_meta), Metadatum::Link(a_meta)] => { - stack.push(Frame::Cache(addr.clone())); - stack.push(Frame::App); - stack.push(Frame::Decompile(MetaAddress { - data: a_data.clone(), - meta: a_meta.clone(), - })); - stack.push(Frame::Decompile(MetaAddress { - data: f_data.clone(), - meta: f_meta.clone(), - })); - }, - _ => return Err(DecompileError::Todo), - } + + (_, Expr::App(f, a)) => { + // No App metadata — use dummy indices (Leaf fallback) + stack.push(Frame::BuildApp(mdata_layers)); + stack.push(Frame::Decompile(a.clone(), u64::MAX)); + stack.push(Frame::Decompile(f.clone(), u64::MAX)); }, - (Ixon::ELam(t_data, b_data), Ixon::Meta(m)) => { - match m.nodes.as_slice() { - [ - Metadatum::Link(n_addr), - Metadatum::Info(bi), - Metadatum::Link(t_meta), - Metadatum::Link(b_meta), - ] => { - let name = decompile_name(n_addr, stt, dstt)?; - stack.push(Frame::Cache(addr.clone())); - stack.push(Frame::Lam(name, bi.clone())); - stack.push(Frame::Decompile(MetaAddress { - data: b_data.clone(), - meta: b_meta.clone(), - })); - stack.push(Frame::Decompile(MetaAddress { - data: t_data.clone(), - meta: t_meta.clone(), - })); - }, - _ => return Err(DecompileError::Todo), - } + + // Lam: extract binder name/info from arena + ( + ExprMetaData::Binder { name: name_addr, info, children }, + Expr::Lam(ty, body), + ) => { + let binder_name = + decompile_name(name_addr, stt).unwrap_or_else(|_| Name::anon()); + stack.push(Frame::BuildLam( + binder_name, + info.clone(), + mdata_layers, + )); + stack.push(Frame::Decompile(body.clone(), children[1])); + stack.push(Frame::Decompile(ty.clone(), children[0])); }, - (Ixon::EAll(t_data, b_data), Ixon::Meta(m)) => { - match m.nodes.as_slice() { - [ - Metadatum::Link(n_addr), - Metadatum::Info(bi), - Metadatum::Link(t_meta), - Metadatum::Link(b_meta), - ] => { - let name = decompile_name(n_addr, stt, dstt)?; - stack.push(Frame::Cache(addr.clone())); - stack.push(Frame::All(name, bi.clone())); - stack.push(Frame::Decompile(MetaAddress { - data: b_data.clone(), - meta: b_meta.clone(), - })); - stack.push(Frame::Decompile(MetaAddress { - data: t_data.clone(), - meta: t_meta.clone(), - })); - }, - _ => return Err(DecompileError::Todo), - } + + (_, Expr::Lam(ty, body)) => { + stack.push(Frame::BuildLam( + Name::anon(), + BinderInfo::Default, + mdata_layers, + )); + stack.push(Frame::Decompile(body.clone(), u64::MAX)); + stack.push(Frame::Decompile(ty.clone(), u64::MAX)); }, - (Ixon::ELet(nd, t_data, v_data, b_data), Ixon::Meta(m)) => { - match m.nodes.as_slice() { - [ - Metadatum::Link(n_addr), - Metadatum::Link(t_meta), - Metadatum::Link(v_meta), - Metadatum::Link(b_meta), - ] => { - let name = decompile_name(n_addr, stt, dstt)?; - stack.push(Frame::Cache(addr.clone())); - stack.push(Frame::Let(name, *nd)); - stack.push(Frame::Decompile(MetaAddress { - data: b_data.clone(), - meta: b_meta.clone(), - })); - stack.push(Frame::Decompile(MetaAddress { - data: v_data.clone(), - meta: v_meta.clone(), - })); - stack.push(Frame::Decompile(MetaAddress { - data: t_data.clone(), - meta: t_meta.clone(), - })); - }, - _ => return Err(DecompileError::Todo), - } + + // All: extract binder name/info from arena + ( + ExprMetaData::Binder { name: name_addr, info, children }, + Expr::All(ty, body), + ) => { + let binder_name = + decompile_name(name_addr, stt).unwrap_or_else(|_| Name::anon()); + stack.push(Frame::BuildAll( + binder_name, + info.clone(), + mdata_layers, + )); + stack.push(Frame::Decompile(body.clone(), children[1])); + stack.push(Frame::Decompile(ty.clone(), children[0])); }, - (Ixon::EPrj(_, idx, s_data), Ixon::Meta(m)) => { - match m.nodes.as_slice() { - [ - Metadatum::Link(n_addr), - Metadatum::Link(_), - Metadatum::Link(s_meta), - ] => { - let name = decompile_name(n_addr, stt, dstt)?; - stack.push(Frame::Cache(addr.clone())); - stack.push(Frame::Proj(name, idx.clone())); - stack.push(Frame::Decompile(MetaAddress { - data: s_data.clone(), - meta: s_meta.clone(), - })); - }, - _ => return Err(DecompileError::Todo), - } + + (_, Expr::All(ty, body)) => { + stack.push(Frame::BuildAll( + Name::anon(), + BinderInfo::Default, + mdata_layers, + )); + stack.push(Frame::Decompile(body.clone(), u64::MAX)); + stack.push(Frame::Decompile(ty.clone(), u64::MAX)); }, - _ => return Err(DecompileError::Todo), + + // Let: extract name from arena + ( + ExprMetaData::LetBinder { name: name_addr, children }, + Expr::Let(non_dep, ty, val, body), + ) => { + let let_name = + decompile_name(name_addr, stt).unwrap_or_else(|_| Name::anon()); + stack.push(Frame::BuildLet(let_name, *non_dep, mdata_layers)); + stack.push(Frame::Decompile(body.clone(), children[2])); + stack.push(Frame::Decompile(val.clone(), children[1])); + stack.push(Frame::Decompile(ty.clone(), children[0])); + }, + + (_, Expr::Let(non_dep, ty, val, body)) => { + stack.push(Frame::BuildLet(Name::anon(), *non_dep, mdata_layers)); + stack.push(Frame::Decompile(body.clone(), u64::MAX)); + stack.push(Frame::Decompile(val.clone(), u64::MAX)); + stack.push(Frame::Decompile(ty.clone(), u64::MAX)); + }, + + // Prj: extract struct name from arena + ( + ExprMetaData::Prj { struct_name, child }, + Expr::Prj(_type_ref_idx, field_idx, struct_val), + ) => { + let type_name = decompile_name(struct_name, stt)?; + stack.push(Frame::BuildProj( + type_name, + Nat::from(*field_idx), + mdata_layers, + )); + stack.push(Frame::Decompile(struct_val.clone(), *child)); + }, + + (_, Expr::Prj(type_ref_idx, field_idx, struct_val)) => { + // Fallback: look up from refs table + let addr = + cache.refs.get(*type_ref_idx as usize).ok_or_else(|| { + DecompileError::InvalidRefIndex { + idx: *type_ref_idx, + refs_len: cache.refs.len(), + constant: cache.current_const.clone(), + } + })?; + let named = stt + .env + .get_named_by_addr(addr) + .ok_or(DecompileError::MissingAddress(addr.clone()))?; + let type_name = decompile_name_from_meta(&named.meta, stt)?; + stack.push(Frame::BuildProj( + type_name, + Nat::from(*field_idx), + mdata_layers, + )); + stack.push(Frame::Decompile(struct_val.clone(), u64::MAX)); + }, + + (_, Expr::Share(_)) => unreachable!("Share handled above"), } }, - Frame::Mdata(kv) => { - let inner = res.pop().expect("Mdata missing inner"); - let expr = Expr::mdata(kv, inner); - res.push(expr); - }, - Frame::App => { - let a = res.pop().expect("App missing a"); - let f = res.pop().expect("App missing f"); - let expr = Expr::app(f, a); - res.push(expr); + + Frame::BuildApp(mdata) => { + let a = results.pop().expect("BuildApp missing arg"); + let f = results.pop().expect("BuildApp missing fun"); + results.push(apply_mdata(LeanExpr::app(f, a), mdata)); }, - Frame::Lam(name, bi) => { - let body = res.pop().expect("Lam missing body"); - let typ = res.pop().expect("Lam missing typ"); - let expr = Expr::lam(name, typ, body, bi); - res.push(expr); + + Frame::BuildLam(name, info, mdata) => { + let body = results.pop().expect("BuildLam missing body"); + let ty = results.pop().expect("BuildLam missing ty"); + results.push(apply_mdata(LeanExpr::lam(name, ty, body, info), mdata)); }, - Frame::All(name, bi) => { - let body = res.pop().expect("All missing body"); - let typ = res.pop().expect("All missing typ"); - let expr = Expr::all(name, typ, body, bi); - res.push(expr); + + Frame::BuildAll(name, info, mdata) => { + let body = results.pop().expect("BuildAll missing body"); + let ty = results.pop().expect("BuildAll missing ty"); + results.push(apply_mdata(LeanExpr::all(name, ty, body, info), mdata)); }, - Frame::Let(name, nd) => { - let body = res.pop().expect("Let missing body"); - let val = res.pop().expect("Let missing val"); - let typ = res.pop().expect("Let missing typ"); - let expr = Expr::letE(name, typ, val, body, nd); - res.push(expr); + + Frame::BuildLet(name, non_dep, mdata) => { + let body = results.pop().expect("BuildLet missing body"); + let val = results.pop().expect("BuildLet missing val"); + let ty = results.pop().expect("BuildLet missing ty"); + results.push(apply_mdata( + LeanExpr::letE(name, ty, val, body, non_dep), + mdata, + )); }, - Frame::Proj(name, idx) => { - let s = res.pop().expect("Proj missing s"); - let expr = Expr::proj(name, idx, s); - res.push(expr); + + Frame::BuildProj(name, idx, mdata) => { + let s = results.pop().expect("BuildProj missing struct"); + results.push(apply_mdata(LeanExpr::proj(name, idx, s), mdata)); }, - Frame::Cache(maddr) => { - if let Some(expr) = res.last() { - cache.exprs.insert(maddr, expr.clone()); + + Frame::CacheResult(e_ptr, arena_idx) => { + if let Some(result) = results.last() { + cache.expr_cache.insert((e_ptr, arena_idx), result.clone()); } }, } } - res.pop().ok_or(DecompileError::Todo) + + results + .pop() + .ok_or(DecompileError::BadConstantFormat { msg: "empty result".into() }) } -pub fn decompile_const_val( - name: &Address, - num_lvls: &Nat, - lvl_names: &[Address], - typ: MetaAddress, +/// Helper: decompile universe indices to Lean levels. +fn decompile_univ_indices( + univ_indices: &[u64], + lvl_names: &[Name], + cache: &mut BlockCache, +) -> Result, DecompileError> { + univ_indices + .iter() + .map(|ui| { + let univ = cache + .univ_table + .get(*ui as usize) + .ok_or_else(|| DecompileError::InvalidUnivIndex { + idx: *ui, + univs_len: cache.univ_table.len(), + constant: cache.current_const.clone(), + })? + .clone(); + decompile_univ(&univ, lvl_names, cache) + }) + .collect() +} + +/// Extract the name address from ConstantMeta. +fn get_name_addr_from_meta(meta: &ConstantMeta) -> Option<&Address> { + match meta { + ConstantMeta::Empty => None, + ConstantMeta::Def { name, .. } => Some(name), + ConstantMeta::Axio { name, .. } => Some(name), + ConstantMeta::Quot { name, .. } => Some(name), + ConstantMeta::Indc { name, .. } => Some(name), + ConstantMeta::Ctor { name, .. } => Some(name), + ConstantMeta::Rec { name, .. } => Some(name), + } +} + +/// Extract level param name addresses from ConstantMeta. +fn get_lvls_from_meta(meta: &ConstantMeta) -> &[Address] { + match meta { + ConstantMeta::Empty => &[], + ConstantMeta::Def { lvls, .. } => lvls, + ConstantMeta::Axio { lvls, .. } => lvls, + ConstantMeta::Quot { lvls, .. } => lvls, + ConstantMeta::Indc { lvls, .. } => lvls, + ConstantMeta::Ctor { lvls, .. } => lvls, + ConstantMeta::Rec { lvls, .. } => lvls, + } +} + +/// Extract arena and type_root from ConstantMeta. +fn get_arena_and_type_root(meta: &ConstantMeta) -> (&ExprMeta, u64) { + static EMPTY_ARENA: ExprMeta = ExprMeta { nodes: Vec::new() }; + match meta { + ConstantMeta::Def { arena, type_root, .. } => (arena, *type_root), + ConstantMeta::Axio { arena, type_root, .. } => (arena, *type_root), + ConstantMeta::Quot { arena, type_root, .. } => (arena, *type_root), + ConstantMeta::Indc { arena, type_root, .. } => (arena, *type_root), + ConstantMeta::Ctor { arena, type_root, .. } => (arena, *type_root), + ConstantMeta::Rec { arena, type_root, .. } => (arena, *type_root), + ConstantMeta::Empty => (&EMPTY_ARENA, 0), + } +} + +/// Extract the all field from ConstantMeta (original Lean all field for roundtrip). +fn get_all_from_meta(meta: &ConstantMeta) -> &[Address] { + match meta { + ConstantMeta::Def { all, .. } => all, + ConstantMeta::Indc { all, .. } => all, + ConstantMeta::Rec { all, .. } => all, + _ => &[], + } +} + +/// Extract the ctx field from ConstantMeta (MutCtx used during compilation for Rec expr decompilation). +fn get_ctx_from_meta(meta: &ConstantMeta) -> &[Address] { + match meta { + ConstantMeta::Def { ctx, .. } => ctx, + ConstantMeta::Indc { ctx, .. } => ctx, + ConstantMeta::Rec { ctx, .. } => ctx, + _ => &[], + } +} + +/// Decompile a name from ConstantMeta. +fn decompile_name_from_meta( + meta: &ConstantMeta, + stt: &CompileState, +) -> Result { + match get_name_addr_from_meta(meta) { + Some(addr) => decompile_name(addr, stt), + None => { + Err(DecompileError::BadConstantFormat { msg: "empty metadata".into() }) + }, + } +} + +/// Extract level param names from ConstantMeta. +fn decompile_level_names_from_meta( + meta: &ConstantMeta, + stt: &CompileState, +) -> Result, DecompileError> { + get_lvls_from_meta(meta).iter().map(|a| decompile_name(a, stt)).collect() +} + +// =========================================================================== +// Constant decompilation +// =========================================================================== + +/// Decompile a ConstantVal (name, level_params, type). +fn decompile_const_val( + typ: &Arc, + meta: &ConstantMeta, cache: &mut BlockCache, stt: &CompileState, dstt: &DecompileState, ) -> Result { - let name = decompile_name(name, stt, dstt)?; - if Nat(lvl_names.len().into()) != *num_lvls { - return Err(DecompileError::MismatchedLevels( - name.clone(), - num_lvls.clone(), - lvl_names.to_vec(), - )); - } - let level_params: Vec = - lvl_names.iter().map(|x| decompile_name(x, stt, dstt)).try_collect()?; - let typ = decompile_expr(typ, &level_params, cache, stt, dstt)?; + let name = decompile_name_from_meta(meta, stt)?; + let level_params = decompile_level_names_from_meta(meta, stt)?; + let (arena, type_root) = get_arena_and_type_root(meta); + let typ = + decompile_expr(typ, arena, type_root, &level_params, cache, stt, dstt)?; Ok(ConstantVal { name, level_params, typ }) } -pub fn decompile_ctor( - ctor: &Constructor, - meta: &Address, +/// Decompile a Definition. +fn decompile_definition( + def: &Definition, + meta: &ConstantMeta, cache: &mut BlockCache, stt: &CompileState, dstt: &DecompileState, -) -> Result { - let meta = read_meta(meta, stt)?; - match meta.nodes.as_slice() { - [ - Metadatum::Link(n), - Metadatum::Links(ls), - Metadatum::Link(tm), - Metadatum::Link(i), - ] => { - let cnst = decompile_const_val( - n, - &ctor.lvls, - ls, - MetaAddress { data: ctor.typ.clone(), meta: tm.clone() }, - cache, - stt, - dstt, - )?; - let induct = decompile_name(i, stt, dstt)?; - Ok(ConstructorVal { - cnst, - induct, - cidx: ctor.cidx.clone(), - num_params: ctor.params.clone(), - num_fields: ctor.fields.clone(), - is_unsafe: ctor.is_unsafe, - }) +) -> Result { + let name = decompile_name_from_meta(meta, stt)?; + let level_params = decompile_level_names_from_meta(meta, stt)?; + + let (arena, type_root, value_root) = match meta { + ConstantMeta::Def { arena, type_root, value_root, .. } => { + (arena, *type_root, *value_root) + }, + _ => { + static EMPTY: ExprMeta = ExprMeta { nodes: Vec::new() }; + (&EMPTY, 0, 0) }, - _ => Err(DecompileError::BadCtor(Box::new((ctor.clone(), meta.clone())))), + }; + + let typ = decompile_expr( + &def.typ, + arena, + type_root, + &level_params, + cache, + stt, + dstt, + )?; + let value = decompile_expr( + &def.value, + arena, + value_root, + &level_params, + cache, + stt, + dstt, + )?; + + let (hints, all) = match meta { + ConstantMeta::Def { hints, all, .. } => { + let all_names: Result, _> = + all.iter().map(|a| decompile_name(a, stt)).collect(); + (*hints, all_names?) + }, + _ => (ReducibilityHints::Opaque, vec![]), + }; + + let cnst = ConstantVal { name, level_params, typ }; + + match def.kind { + DefKind::Definition => Ok(LeanConstantInfo::DefnInfo(DefinitionVal { + cnst, + value, + hints, + safety: def.safety, + all, + })), + DefKind::Theorem => { + Ok(LeanConstantInfo::ThmInfo(TheoremVal { cnst, value, all })) + }, + DefKind::Opaque => Ok(LeanConstantInfo::OpaqueInfo(OpaqueVal { + cnst, + value, + is_unsafe: def.safety == DefinitionSafety::Unsafe, + all, + })), } } -pub fn decompile_recr_rule( - lvls: &[Name], - rule: &ixon::RecursorRule, - n: &Address, - m: &Address, +/// Decompile a Recursor. +/// Arena covers type + all rule RHS expressions with rule_roots. +fn decompile_recursor( + rec: &Recursor, + meta: &ConstantMeta, cache: &mut BlockCache, stt: &CompileState, dstt: &DecompileState, -) -> Result { - let ctor = decompile_name(n, stt, dstt)?; - let rhs = decompile_expr( - MetaAddress { data: rule.rhs.clone(), meta: m.clone() }, - lvls, +) -> Result { + let name = decompile_name_from_meta(meta, stt)?; + let level_params = decompile_level_names_from_meta(meta, stt)?; + + let (arena, type_root, rule_roots, rule_addrs, all_addrs) = match meta { + ConstantMeta::Rec { arena, type_root, rule_roots, rules, all, .. } => ( + arena, + *type_root, + rule_roots.as_slice(), + rules.as_slice(), + all.as_slice(), + ), + _ => { + static EMPTY: ExprMeta = ExprMeta { nodes: Vec::new() }; + (&EMPTY, 0u64, &[] as &[u64], &[] as &[Address], &[] as &[Address]) + }, + }; + + let typ = decompile_expr( + &rec.typ, + arena, + type_root, + &level_params, cache, stt, dstt, )?; - Ok(RecursorRule { ctor, n_fields: rule.fields.clone(), rhs }) + + let rule_names = rule_addrs + .iter() + .map(|a| decompile_name(a, stt)) + .collect::, _>>()?; + let all = all_addrs + .iter() + .map(|a| decompile_name(a, stt)) + .collect::, _>>() + .unwrap_or_else(|_| vec![name.clone()]); + + let mut rules = Vec::with_capacity(rec.rules.len()); + for (i, (rule, ctor_name)) in + rec.rules.iter().zip(rule_names.iter()).enumerate() + { + let rhs_root = rule_roots.get(i).copied().unwrap_or(0); + let rhs = decompile_expr( + &rule.rhs, + arena, + rhs_root, + &level_params, + cache, + stt, + dstt, + )?; + rules.push(LeanRecursorRule { + ctor: ctor_name.clone(), + n_fields: Nat::from(rule.fields), + rhs, + }); + } + + let cnst = ConstantVal { name, level_params, typ }; + + Ok(LeanConstantInfo::RecInfo(RecursorVal { + cnst, + all, + num_params: Nat::from(rec.params), + num_indices: Nat::from(rec.indices), + num_motives: Nat::from(rec.motives), + num_minors: Nat::from(rec.minors), + rules, + k: rec.k, + is_unsafe: rec.is_unsafe, + })) } -pub fn decompile_defn( - cnst_name: &Name, - def: &Definition, - meta: &Metadata, +/// Decompile a Constructor. +/// Constructor metadata is in its own ConstantMeta::Ctor (resolved from Named entries). +fn decompile_constructor( + ctor: &Constructor, + meta: &ConstantMeta, + induct_name: Name, cache: &mut BlockCache, stt: &CompileState, dstt: &DecompileState, -) -> Result { - match meta.nodes.as_slice() { - [ - Metadatum::Link(n), - Metadatum::Links(ls), - Metadatum::Hints(hints), - Metadatum::Link(tm), - Metadatum::Link(vm), - Metadatum::Links(all), - ] => { - let cnst = decompile_const_val( - n, - &def.lvls, - ls, - MetaAddress { data: def.typ.clone(), meta: tm.clone() }, - cache, - stt, - dstt, - )?; - if cnst.name != *cnst_name { - return Err(DecompileError::ConstName( - cnst.name.clone(), - cnst_name.clone(), - )); - } - let value = decompile_expr( - MetaAddress { data: def.value.clone(), meta: vm.clone() }, - &cnst.level_params, - cache, - stt, - dstt, - )?; - let all = - all.iter().map(|x| decompile_name(x, stt, dstt)).try_collect()?; - match def.kind { - DefKind::Definition => Ok(ConstantInfo::DefnInfo(DefinitionVal { - cnst, - value, - hints: *hints, - safety: def.safety, - all, - })), - DefKind::Theorem => { - Ok(ConstantInfo::ThmInfo(TheoremVal { cnst, value, all })) - }, - DefKind::Opaque => Ok(ConstantInfo::OpaqueInfo(OpaqueVal { - cnst, - value, - is_unsafe: def.safety == DefinitionSafety::Unsafe, - all, - })), - } - }, - _ => Err(DecompileError::BadDef(Box::new((def.clone(), meta.clone())))), - } +) -> Result { + let name = decompile_name_from_meta(meta, stt)?; + let level_params = decompile_level_names_from_meta(meta, stt)?; + + let (arena, type_root) = get_arena_and_type_root(meta); + let typ = decompile_expr( + &ctor.typ, + arena, + type_root, + &level_params, + cache, + stt, + dstt, + )?; + + let cnst = ConstantVal { name, level_params, typ }; + + Ok(ConstructorVal { + cnst, + induct: induct_name, + cidx: Nat::from(ctor.cidx), + num_params: Nat::from(ctor.params), + num_fields: Nat::from(ctor.fields), + is_unsafe: ctor.is_unsafe, + }) } -pub fn decompile_recr( - cnst_name: &Name, - recr: &Recursor, - meta: &Metadata, +/// Decompile an Inductive. +/// Constructor metadata is resolved from Named entries, not from CtorMeta. +fn decompile_inductive( + ind: &Inductive, + meta: &ConstantMeta, cache: &mut BlockCache, stt: &CompileState, dstt: &DecompileState, -) -> Result { - match meta.nodes.as_slice() { - [ - Metadatum::Link(n), - Metadatum::Links(ls), - Metadatum::Link(tm), - Metadatum::Map(rs), - Metadatum::Links(all), - ] => { - let cnst = decompile_const_val( - n, - &recr.lvls, - ls, - MetaAddress { data: recr.typ.clone(), meta: tm.clone() }, - cache, - stt, - dstt, - )?; - if cnst.name != *cnst_name { - return Err(DecompileError::ConstName( - cnst.name.clone(), - cnst_name.clone(), - )); - } - let all = - all.iter().map(|x| decompile_name(x, stt, dstt)).try_collect()?; - let rules: Vec = recr - .rules +) -> Result<(InductiveVal, Vec), DecompileError> { + let name = decompile_name_from_meta(meta, stt)?; + let level_params = decompile_level_names_from_meta(meta, stt)?; + + let (arena, type_root) = get_arena_and_type_root(meta); + let typ = decompile_expr( + &ind.typ, + arena, + type_root, + &level_params, + cache, + stt, + dstt, + )?; + + // Extract constructor name addresses and all from metadata + let (ctor_name_addrs, all) = match meta { + ConstantMeta::Indc { ctors, all: all_addrs, .. } => { + let all = all_addrs .iter() - .zip(rs) - .map(|(rd, (n, rm))| { - decompile_recr_rule(&cnst.level_params, rd, n, rm, cache, stt, dstt) - }) - .try_collect()?; - Ok(ConstantInfo::RecInfo(RecursorVal { - cnst, - all, - num_params: recr.params.clone(), - num_indices: recr.indices.clone(), - num_motives: recr.motives.clone(), - num_minors: recr.minors.clone(), - rules, - k: recr.k, - is_unsafe: recr.is_unsafe, - })) + .map(|a| decompile_name(a, stt)) + .collect::, _>>()?; + (ctors.as_slice(), all) }, - _ => Err(DecompileError::BadRec(Box::new((recr.clone(), meta.clone())))), + _ => (&[] as &[Address], vec![name.clone()]), + }; + + let mut ctors = Vec::with_capacity(ind.ctors.len()); + let mut ctor_names = Vec::new(); + + for (i, ctor) in ind.ctors.iter().enumerate() { + // Clear expr_cache: each constructor has its own arena, so cached entries + // from the inductive's arena (or a previous constructor's arena) would + // produce stale hits when arena indices coincide. + cache.expr_cache.clear(); + + // Look up constructor's Named entry for its ConstantMeta::Ctor + let ctor_meta = if let Some(addr) = ctor_name_addrs.get(i) { + if let Ok(ctor_name) = decompile_name(addr, stt) { + stt + .env + .named + .get(&ctor_name) + .map(|n| n.meta.clone()) + .unwrap_or_default() + } else { + ConstantMeta::Empty + } + } else { + ConstantMeta::Empty + }; + + let ctor_val = + decompile_constructor(ctor, &ctor_meta, name.clone(), cache, stt, dstt)?; + ctor_names.push(ctor_val.cnst.name.clone()); + ctors.push(ctor_val); } + + let cnst = ConstantVal { name, level_params, typ }; + + let ind_val = InductiveVal { + cnst, + num_params: Nat::from(ind.params), + num_indices: Nat::from(ind.indices), + all, + ctors: ctor_names, + num_nested: Nat::from(ind.nested), + is_rec: ind.recr, + is_reflexive: ind.refl, + is_unsafe: ind.is_unsafe, + }; + + Ok((ind_val, ctors)) } -pub fn decompile_mut_const( - cnst_name: &Name, - cnst: &MutConst, - meta: &Metadata, +/// Decompile an Axiom. +fn decompile_axiom( + ax: &Axiom, + meta: &ConstantMeta, cache: &mut BlockCache, stt: &CompileState, dstt: &DecompileState, -) -> Result { - match cnst { - MutConst::Defn(d) => decompile_defn(cnst_name, d, meta, cache, stt, dstt), - MutConst::Indc(i) => match meta.nodes.as_slice() { - [ - Metadatum::Link(n), - Metadatum::Links(ls), - Metadatum::Link(tm), - Metadatum::Links(cs), - Metadatum::Links(all), - ] => { - let cnst = decompile_const_val( - n, - &i.lvls, - ls, - MetaAddress { data: i.typ.clone(), meta: tm.clone() }, - cache, - stt, - dstt, - )?; - if cnst.name != *cnst_name { - return Err(DecompileError::ConstName( - cnst.name.clone(), - cnst_name.clone(), - )); - } - let all = - all.iter().map(|x| decompile_name(x, stt, dstt)).try_collect()?; - if i.ctors.len() != cs.len() { - return Err(DecompileError::MismatchedCtors( - cnst.name.clone(), - i.ctors.clone(), - cs.clone(), - )); - } - let ctors: Vec = i - .ctors - .iter() - .zip(cs) - .map(|(c, m)| decompile_ctor(c, m, cache, stt, dstt)) - .try_collect()?; - let ctor_names: Vec = - ctors.iter().map(|c| c.cnst.name.clone()).collect(); - for (cn, c) in ctor_names.iter().zip(ctors) { - dstt.env.insert(cn.clone(), ConstantInfo::CtorInfo(c)); - } - Ok(ConstantInfo::InductInfo(InductiveVal { - cnst, - num_params: i.params.clone(), - num_indices: i.indices.clone(), - all, - ctors: ctor_names, - num_nested: i.nested.clone(), - is_rec: i.recr, - is_reflexive: i.refl, - is_unsafe: i.is_unsafe, - })) - }, - _ => Err(DecompileError::BadInd(Box::new((i.clone(), meta.clone())))), - }, - MutConst::Recr(r) => decompile_recr(cnst_name, r, meta, cache, stt, dstt), - } +) -> Result { + let cnst = decompile_const_val(&ax.typ, meta, cache, stt, dstt)?; + Ok(LeanConstantInfo::AxiomInfo(AxiomVal { cnst, is_unsafe: ax.is_unsafe })) } -pub fn decompile_block( - addr: &MetaAddress, +/// Decompile a Quotient. +fn decompile_quotient( + quot: &Quotient, + meta: &ConstantMeta, cache: &mut BlockCache, stt: &CompileState, dstt: &DecompileState, +) -> Result { + let cnst = decompile_const_val(".typ, meta, cache, stt, dstt)?; + Ok(LeanConstantInfo::QuotInfo(QuotVal { cnst, kind: quot.kind })) +} + +// =========================================================================== +// Mutual block decompilation +// =========================================================================== + +/// Decompile a mutual block (Vec). +/// Decompile a single projection, given the block data and sharing. +#[allow(clippy::too_many_arguments)] +fn decompile_projection( + name: &Name, + named: &Named, + cnst: &Constant, + mutuals: &[MutConst], + block_sharing: &[Arc], + block_refs: &[Address], + block_univs: &[Arc], + stt: &CompileState, + dstt: &DecompileState, ) -> Result<(), DecompileError> { - match (read_ixon(&addr.data, stt)?, read_ixon(&addr.meta, stt)?) { - (ref d @ Ixon::Muts(ref muts), ref m @ Ixon::Meta(ref meta)) => { - match meta.nodes.as_slice() { - [ - Metadatum::Muts(muts_names), - Metadatum::Map(muts_ctx), - Metadatum::Map(muts_metas), - ] => { - if muts.len() != muts_names.len() { - Err(DecompileError::BadBlock(Box::new((d.clone(), m.clone())))) - } else { - let mut meta_map = FxHashMap::default(); - for (name, meta) in muts_metas { - let name = decompile_name(name, stt, dstt)?; - let meta = read_meta(meta, stt)?; - meta_map.insert(name, meta); - } - let mut ctx = MutCtx::default(); - for (name, idx) in muts_ctx { - let name = decompile_name(name, stt, dstt)?; - let idx = read_nat(idx, stt)?; - ctx.insert(name, idx); - } - dstt.block_ctx.insert(addr.clone(), ctx.clone()); - cache.ctx = ctx; - for (cnst, names) in muts.iter().zip(muts_names) { - for n in names { - let name = decompile_name(n, stt, dstt)?; - let meta = meta_map.get(&name).ok_or(DecompileError::Todo)?; - let info = - decompile_mut_const(&name, cnst, meta, cache, stt, dstt)?; - dstt.env.insert(name, info); - } - } - Ok(()) - } - }, - _ => Err(DecompileError::BadBlock(Box::new((d.clone(), m.clone())))), + // Build ctx from metadata's ctx field + let ctx_addrs = get_ctx_from_meta(&named.meta); + let ctx_names: Vec = + ctx_addrs.iter().filter_map(|a| decompile_name(a, stt).ok()).collect(); + + // Set up cache with sharing, refs, univs, and ctx + let mut cache = BlockCache { + sharing: block_sharing.to_vec(), + refs: block_refs.to_vec(), + univ_table: block_univs.to_vec(), + ctx: all_to_ctx(&ctx_names), + current_const: name.pretty(), + ..Default::default() + }; + + match &cnst.info { + ConstantInfo::DPrj(proj) => { + if let Some(MutConst::Defn(def)) = mutuals.get(proj.idx as usize) { + let info = + decompile_definition(def, &named.meta, &mut cache, stt, dstt)?; + dstt.env.insert(name.clone(), info); + } + }, + + ConstantInfo::IPrj(_proj) => { + if let Some(MutConst::Indc(ind)) = mutuals.get(_proj.idx as usize) { + let (ind_val, ctors) = + decompile_inductive(ind, &named.meta, &mut cache, stt, dstt)?; + dstt.env.insert(name.clone(), LeanConstantInfo::InductInfo(ind_val)); + for ctor in ctors { + dstt + .env + .insert(ctor.cnst.name.clone(), LeanConstantInfo::CtorInfo(ctor)); + } } }, - (d, m) => Err(DecompileError::BadBlock(Box::new((d, m)))), + + ConstantInfo::RPrj(proj) => { + if let Some(MutConst::Recr(rec)) = mutuals.get(proj.idx as usize) { + let info = decompile_recursor(rec, &named.meta, &mut cache, stt, dstt)?; + dstt.env.insert(name.clone(), info); + } + }, + + _ => {}, } + + Ok(()) } -pub fn decompile_const( - cnst_name: &Name, - addr: &MetaAddress, - cache: &mut BlockCache, +/// Decompile a single constant (non-mutual). +fn decompile_const( + name: &Name, + named: &Named, stt: &CompileState, dstt: &DecompileState, ) -> Result<(), DecompileError> { - match (read_ixon(&addr.data, stt)?, read_ixon(&addr.meta, stt)?) { - (Ixon::Defn(x), Ixon::Meta(m)) => { - cache.ctx = - vec![(cnst_name.clone(), Nat(0u64.into()))].into_iter().collect(); - let info = decompile_defn(cnst_name, &x, &m, cache, stt, dstt)?; - dstt.env.insert(cnst_name.clone(), info); - dstt.consts.insert(addr.clone(), cnst_name.clone()); - Ok(()) - }, - (Ixon::Recr(x), Ixon::Meta(m)) => { - cache.ctx = - vec![(cnst_name.clone(), Nat(0u64.into()))].into_iter().collect(); - let info = decompile_recr(cnst_name, &x, &m, cache, stt, dstt)?; - dstt.env.insert(cnst_name.clone(), info); - dstt.consts.insert(addr.clone(), cnst_name.clone()); - Ok(()) - }, - (Ixon::Axio(x), Ixon::Meta(m)) => match m.nodes.as_slice() { - [Metadatum::Link(n), Metadatum::Links(ls), Metadatum::Link(tm)] => { - let cnst = decompile_const_val( - n, - &x.lvls, - ls, - MetaAddress { data: x.typ, meta: tm.clone() }, - cache, - stt, - dstt, - )?; - let info = - ConstantInfo::AxiomInfo(AxiomVal { cnst, is_unsafe: x.is_unsafe }); - dstt.env.insert(cnst_name.clone(), info); - dstt.consts.insert(addr.clone(), cnst_name.clone()); - Ok(()) - }, - _ => Err(DecompileError::Todo), + let cnst = read_const(&named.addr, stt)?; + + // Build ctx from metadata's all field + let all_addrs = get_all_from_meta(&named.meta); + let all_names: Vec = + all_addrs.iter().filter_map(|a| decompile_name(a, stt).ok()).collect(); + let ctx = all_to_ctx(&all_names); + let current_const = name.pretty(); + + match cnst { + Constant { + info: ConstantInfo::Defn(def), + ref sharing, + ref refs, + ref univs, + } => { + let mut cache = BlockCache { + sharing: sharing.clone(), + refs: refs.clone(), + univ_table: univs.clone(), + ctx: ctx.clone(), + current_const: current_const.clone(), + ..Default::default() + }; + let info = + decompile_definition(&def, &named.meta, &mut cache, stt, dstt)?; + dstt.env.insert(name.clone(), info); }, - (Ixon::Quot(x), Ixon::Meta(m)) => match m.nodes.as_slice() { - [Metadatum::Link(n), Metadatum::Links(ls), Metadatum::Link(tm)] => { - let cnst = decompile_const_val( - n, - &x.lvls, - ls, - MetaAddress { data: x.typ, meta: tm.clone() }, - cache, - stt, - dstt, - )?; - let info = ConstantInfo::QuotInfo(QuotVal { cnst, kind: x.kind }); - dstt.env.insert(cnst_name.clone(), info); - dstt.consts.insert(addr.clone(), cnst_name.clone()); - Ok(()) - }, - _ => Err(DecompileError::Todo), + + Constant { + info: ConstantInfo::Recr(rec), + ref sharing, + ref refs, + ref univs, + } => { + let mut cache = BlockCache { + sharing: sharing.clone(), + refs: refs.clone(), + univ_table: univs.clone(), + ctx: ctx.clone(), + current_const: current_const.clone(), + ..Default::default() + }; + let info = decompile_recursor(&rec, &named.meta, &mut cache, stt, dstt)?; + dstt.env.insert(name.clone(), info); }, - (Ixon::DPrj(x), Ixon::Meta(m)) => match m.nodes.as_slice() { - [Metadatum::Link(bm), Metadatum::Link(_)] => { - let block = MetaAddress { data: x.block, meta: bm.clone() }; - let ctx = dstt.block_ctx.get(&block).ok_or(DecompileError::Todo)?; - ctx.get(cnst_name).ok_or(DecompileError::Todo)?; - dstt.consts.insert(addr.clone(), cnst_name.clone()); - Ok(()) - }, - _ => Err(DecompileError::Todo), + + Constant { + info: ConstantInfo::Axio(ax), + ref sharing, + ref refs, + ref univs, + } => { + let mut cache = BlockCache { + sharing: sharing.clone(), + refs: refs.clone(), + univ_table: univs.clone(), + ctx: ctx.clone(), + current_const: current_const.clone(), + ..Default::default() + }; + let info = decompile_axiom(&ax, &named.meta, &mut cache, stt, dstt)?; + dstt.env.insert(name.clone(), info); }, - (Ixon::RPrj(x), Ixon::Meta(m)) => match m.nodes.as_slice() { - [Metadatum::Link(bm), Metadatum::Link(_)] => { - let block = MetaAddress { data: x.block, meta: bm.clone() }; - let ctx = dstt.block_ctx.get(&block).ok_or(DecompileError::Todo)?; - ctx.get(cnst_name).ok_or(DecompileError::Todo)?; - dstt.consts.insert(addr.clone(), cnst_name.clone()); - Ok(()) - }, - _ => Err(DecompileError::Todo), + + Constant { + info: ConstantInfo::Quot(quot), + ref sharing, + ref refs, + ref univs, + } => { + let mut cache = BlockCache { + sharing: sharing.clone(), + refs: refs.clone(), + univ_table: univs.clone(), + ctx, + current_const, + ..Default::default() + }; + let info = decompile_quotient(", &named.meta, &mut cache, stt, dstt)?; + dstt.env.insert(name.clone(), info); }, - (Ixon::CPrj(x), Ixon::Meta(m)) => match m.nodes.as_slice() { - [Metadatum::Link(bm), Metadatum::Link(_)] => { - let block = MetaAddress { data: x.block, meta: bm.clone() }; - let ctx = dstt.block_ctx.get(&block).ok_or(DecompileError::Todo)?; - ctx.get(cnst_name).ok_or(DecompileError::Todo)?; - dstt.consts.insert(addr.clone(), cnst_name.clone()); - Ok(()) - }, - _ => Err(DecompileError::Todo), + + Constant { info: ConstantInfo::DPrj(_), .. } + | Constant { info: ConstantInfo::IPrj(_), .. } + | Constant { info: ConstantInfo::RPrj(_), .. } + | Constant { info: ConstantInfo::CPrj(_), .. } => { + // Projections are handled by decompile_block }, - (Ixon::IPrj(x), Ixon::Meta(m)) => match m.nodes.as_slice() { - [Metadatum::Link(bm), Metadatum::Link(_)] => { - let block = MetaAddress { data: x.block, meta: bm.clone() }; - let ctx = dstt.block_ctx.get(&block).ok_or(DecompileError::Todo)?; - ctx.get(cnst_name).ok_or(DecompileError::Todo)?; - dstt.consts.insert(addr.clone(), cnst_name.clone()); - Ok(()) - }, - _ => Err(DecompileError::Todo), + + Constant { info: ConstantInfo::Muts(_), .. } => { + // Mutual blocks are handled separately }, - _ => todo!(), } + + Ok(()) } +// =========================================================================== +// Main entry point +// =========================================================================== + +/// Decompile an Ixon environment back to Lean format. pub fn decompile_env( stt: &CompileState, ) -> Result { let dstt = DecompileState::default(); - stt.blocks.par_iter().try_for_each(|addr| { - decompile_block(&addr, &mut BlockCache::default(), stt, &dstt) - })?; - stt.consts.par_iter().try_for_each(|entry| { - decompile_const( - entry.key(), - entry.value(), - &mut BlockCache::default(), - stt, - &dstt, - ) + + // Constructor metadata is now embedded directly in ConstantMeta::Indc, + // so no pre-indexing is needed. + + // Single pass through all named constants + stt.env.named.par_iter().try_for_each(|entry| { + let (name, named) = (entry.key(), entry.value()); + + if let Some(cnst) = stt.env.get_const(&named.addr) { + match &cnst.info { + // Direct constants - decompile immediately + ConstantInfo::Defn(_) + | ConstantInfo::Recr(_) + | ConstantInfo::Axio(_) + | ConstantInfo::Quot(_) => decompile_const(name, named, stt, &dstt), + + // Projections - get the block and decompile + ConstantInfo::DPrj(proj) => { + if let Some(Constant { + info: ConstantInfo::Muts(mutuals), + ref sharing, + ref refs, + ref univs, + }) = stt.env.get_const(&proj.block) + { + decompile_projection( + name, named, &cnst, &mutuals, sharing, refs, univs, stt, &dstt, + ) + } else { + Err(DecompileError::MissingAddress(proj.block.clone())) + } + }, + + ConstantInfo::IPrj(proj) => { + if let Some(Constant { + info: ConstantInfo::Muts(mutuals), + ref sharing, + ref refs, + ref univs, + }) = stt.env.get_const(&proj.block) + { + decompile_projection( + name, named, &cnst, &mutuals, sharing, refs, univs, stt, &dstt, + ) + } else { + Err(DecompileError::MissingAddress(proj.block.clone())) + } + }, + + ConstantInfo::RPrj(proj) => { + if let Some(Constant { + info: ConstantInfo::Muts(mutuals), + ref sharing, + ref refs, + ref univs, + }) = stt.env.get_const(&proj.block) + { + decompile_projection( + name, named, &cnst, &mutuals, sharing, refs, univs, stt, &dstt, + ) + } else { + Err(DecompileError::MissingAddress(proj.block.clone())) + } + }, + + // Constructor projections are handled when their parent inductive is decompiled + ConstantInfo::CPrj(_) => Ok(()), + + // Mutual blocks themselves don't need separate handling + ConstantInfo::Muts(_) => Ok(()), + } + } else { + Ok(()) + } })?; + Ok(dstt) } +/// Result of checking a decompiled environment against the original. +#[derive(Debug)] +pub struct CheckResult { + pub matches: usize, + pub mismatches: usize, + pub missing: usize, +} + +/// Check that decompiled environment matches the original. +/// Counts and logs hash mismatches (which indicate metadata loss or decompilation errors). pub fn check_decompile( - env: &Env, - cstt: &CompileState, + original: &LeanEnv, + _stt: &CompileState, dstt: &DecompileState, -) -> Result<(), DecompileError> { - cstt.consts.par_iter().try_for_each(|entry| { - let (name, addr) = (entry.key(), entry.value()); - match dstt.consts.get(addr) { - Some(n2) if name == n2.value() => Ok(()), - Some(n2) => Err(DecompileError::ConstAddrMismatch( - name.clone(), - Box::new(addr.clone()), - n2.value().clone(), - )), - None => Err(DecompileError::ConstAddrNotDecompiled( - name.clone(), - Box::new(addr.clone()), - )), - } - })?; +) -> Result { + use std::sync::atomic::{AtomicUsize, Ordering}; - dstt.consts.par_iter().try_for_each(|entry| { - let (addr, name) = (entry.key(), entry.value()); - match cstt.consts.get(name) { - Some(a2) if addr == a2.value() => Ok(()), - Some(a2) => Err(DecompileError::ConstNameMismatch( - name.clone(), - Box::new((addr.clone(), a2.value().clone())), - )), - None => Err(DecompileError::ConstNameNotCompiled( - name.clone(), - Box::new(addr.clone()), - )), - } - })?; + let mismatches = AtomicUsize::new(0); + let matches = AtomicUsize::new(0); + let missing = AtomicUsize::new(0); - if env.len() != dstt.env.len() { - return Err(DecompileError::EnvSizeMismatch { - original: env.len(), - decompiled: dstt.env.len(), - }); + if original.len() != dstt.env.len() { + eprintln!( + "check_decompile: size mismatch: original={}, decompiled={}", + original.len(), + dstt.env.len() + ); } dstt.env.par_iter().try_for_each(|entry| { let (name, info) = (entry.key(), entry.value()); - match env.get(name) { - Some(info2) if info.get_hash() == info2.get_hash() => Ok(()), - Some(info2) => Err(DecompileError::ConstHashMismatch( - name.clone(), - Box::new((info2.get_hash(), info.get_hash())), - )), - None => Err(DecompileError::ConstMissingInOriginal(name.clone())), + match original.get(name) { + Some(orig_info) if orig_info.get_hash() == info.get_hash() => { + matches.fetch_add(1, Ordering::Relaxed); + Ok::<(), DecompileError>(()) + }, + Some(orig_info) => { + // Hash mismatch - log the constant name and hashes + let count = mismatches.fetch_add(1, Ordering::Relaxed); + if count < 20 { + eprintln!( + "check_decompile: hash mismatch for {}: original={:?}, decompiled={:?}", + name.pretty(), + orig_info.get_hash(), + info.get_hash() + ); + } + Ok(()) + }, + None => { + missing.fetch_add(1, Ordering::Relaxed); + Ok(()) + }, } })?; - Ok(()) + let result = CheckResult { + matches: matches.load(Ordering::Relaxed), + mismatches: mismatches.load(Ordering::Relaxed), + missing: missing.load(Ordering::Relaxed), + }; + eprintln!( + "check_decompile: {} matches, {} mismatches, {} not in original", + result.matches, result.mismatches, result.missing + ); + + Ok(result) } diff --git a/src/ix/env.rs b/src/ix/env.rs index aa612de7..73749f98 100644 --- a/src/ix/env.rs +++ b/src/ix/env.rs @@ -1,3 +1,13 @@ +//! LEON (Lean Environment Objective Notation) types. +//! +//! This module defines a content-addressed representation of the Lean 4 kernel +//! type system. Every term, name, universe level, and constant is hashed with +//! Blake3 to produce a deterministic content address. +//! +//! Constructor tags (`NANON`, `NSTR`, `EVAR`, `DEFN`, etc.) are single-byte +//! discriminants prepended to the hasher input so that structurally identical +//! subtrees in different syntactic categories never collide. + use blake3::Hash; use std::{ hash::{Hash as StdHash, Hasher}, @@ -7,45 +17,96 @@ use std::{ use crate::lean::nat::Nat; use rustc_hash::FxHashMap; -// LEON: Lean Environment Objective Notation -// These tags roughly correspond to Ixon tags +// -- Name tags ---------------------------------------------------------------- + +/// Tag for the anonymous (root) name. pub const NANON: u8 = 0x00; +/// Tag for a string name component. pub const NSTR: u8 = 0x01; +/// Tag for a numeric name component. pub const NNUM: u8 = 0x02; + +// -- Level tags --------------------------------------------------------------- + +/// Tag for universe level zero. pub const UZERO: u8 = 0x03; +/// Tag for universe level successor. pub const USUCC: u8 = 0x04; +/// Tag for universe level max. pub const UMAX: u8 = 0x05; +/// Tag for universe level imax. pub const UIMAX: u8 = 0x06; +/// Tag for a universe level parameter. pub const UPARAM: u8 = 0x10; +/// Tag for a universe level metavariable. pub const UMVAR: u8 = 0x70; + +// -- Expr tags ---------------------------------------------------------------- + +/// Tag for a bound variable (de Bruijn index). pub const EVAR: u8 = 0x20; +/// Tag for a sort expression. pub const ESORT: u8 = 0x80; +/// Tag for a constant reference. pub const EREF: u8 = 0x30; +/// Tag for a projection expression. pub const EPRJ: u8 = 0x50; +/// Tag for a string literal expression. pub const ESTR: u8 = 0x81; +/// Tag for a natural number literal expression. pub const ENAT: u8 = 0x82; +/// Tag for a function application. pub const EAPP: u8 = 0x83; +/// Tag for a lambda abstraction. pub const ELAM: u8 = 0x84; +/// Tag for a dependent function type (forall / Pi). pub const EALL: u8 = 0x85; +/// Tag for a let-binding. pub const ELET: u8 = 0x86; +/// Tag for a free variable. pub const EFVAR: u8 = 0x72; +/// Tag for an expression metavariable. pub const EMVAR: u8 = 0x73; +/// Tag for metadata-annotated expressions. pub const EMDATA: u8 = 0x74; + +// -- Constant tags ------------------------------------------------------------ + +/// Tag for a definition constant. pub const DEFN: u8 = 0xA0; +/// Tag for a recursor constant. pub const RECR: u8 = 0xA1; +/// Tag for an axiom constant. pub const AXIO: u8 = 0xA2; +/// Tag for a quotient constant. pub const QUOT: u8 = 0xA3; +/// Tag for an inductive type constant. pub const INDC: u8 = 0xA6; +/// Tag for a constructor constant. pub const CTOR: u8 = 0xC0; +/// Tag for a theorem constant. pub const THEO: u8 = 0xC1; +/// Tag for an opaque constant. pub const OPAQ: u8 = 0xC2; + +// -- Metadata tags ------------------------------------------------------------ + +/// Tag for an integer metadata value. pub const MINT: u8 = 0xF1; +/// Tag for a substring metadata value. pub const MSSTR: u8 = 0xF2; +/// Tag for source info metadata. pub const MSINFO: u8 = 0xF3; +/// Tag for syntax pre-resolution metadata. pub const MSPRE: u8 = 0xF4; +/// Tag for syntax tree metadata. pub const MSYN: u8 = 0xF5; +/// Tag for a generic data value in metadata. pub const MDVAL: u8 = 0xF6; +/// A content-addressed hierarchical name. +/// +/// Names are interned via `Arc` and compared/hashed by their Blake3 digest. #[derive(PartialEq, Eq, Debug, Clone)] pub struct Name(pub Arc); @@ -61,28 +122,38 @@ impl Ord for Name { } } +/// The underlying data for a [`Name`]. +/// +/// Each variant carries its precomputed Blake3 hash as the last field. #[derive(PartialEq, Eq, Debug)] pub enum NameData { + /// The root (empty) name. Anonymous(Hash), + /// A string component appended to a prefix name. Str(Name, String, Hash), + /// A numeric component appended to a prefix name. Num(Name, Nat, Hash), } impl Name { + /// Returns a reference to the inner [`NameData`]. pub fn as_data(&self) -> &NameData { &self.0 } - pub fn get_hash(&self) -> Hash { - match *self.0 { + /// Returns the precomputed Blake3 hash of this name. + pub fn get_hash(&self) -> &Hash { + match self.0.as_ref() { NameData::Anonymous(h) | NameData::Str(.., h) | NameData::Num(.., h) => h, } } + /// Constructs the anonymous (root) name. pub fn anon() -> Self { let hash = blake3::hash(&[NANON]); Name(Arc::new(NameData::Anonymous(hash))) } + /// Constructs a name by appending a string component to `pre`. pub fn str(pre: Name, s: String) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[NSTR]); @@ -92,6 +163,7 @@ impl Name { Name(Arc::new(NameData::Str(pre, s, hash))) } + /// Constructs a name by appending a numeric component to `pre`. pub fn num(pre: Name, n: Nat) -> Name { let mut hasher = blake3::Hasher::new(); hasher.update(&[NNUM]); @@ -100,6 +172,7 @@ impl Name { let hash = hasher.finalize(); Name(Arc::new(NameData::Num(pre, n, hash))) } + /// Returns a dot-separated human-readable representation of this name. pub fn pretty(&self) -> String { let mut components = Vec::new(); let mut current = self; @@ -129,24 +202,38 @@ impl StdHash for Name { } } +/// A content-addressed universe level. +/// +/// Levels are interned via `Arc` and compared/hashed by their Blake3 digest. #[derive(PartialEq, Eq, Debug, Clone)] pub struct Level(pub Arc); +/// The underlying data for a [`Level`]. +/// +/// Each variant carries its precomputed Blake3 hash as the last field. #[derive(Debug, PartialEq, Eq)] pub enum LevelData { + /// Universe level 0 (Prop). Zero(Hash), + /// Successor of a universe level. Succ(Level, Hash), + /// Maximum of two universe levels. Max(Level, Level, Hash), + /// Impredicative maximum of two universe levels. Imax(Level, Level, Hash), + /// A named universe parameter. Param(Name, Hash), + /// A universe-level metavariable. Mvar(Name, Hash), } impl Level { + /// Returns a reference to the inner [`LevelData`]. pub fn as_data(&self) -> &LevelData { &self.0 } + /// Returns the precomputed Blake3 hash of this level. pub fn get_hash(&self) -> &Hash { match &*self.0 { LevelData::Zero(h) @@ -157,15 +244,18 @@ impl Level { | LevelData::Mvar(_, h) => h, } } + /// Constructs universe level 0. pub fn zero() -> Self { Level(Arc::new(LevelData::Zero(blake3::hash(&[UZERO])))) } + /// Constructs the successor of universe level `x`. pub fn succ(x: Level) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[USUCC]); hasher.update(x.get_hash().as_bytes()); Level(Arc::new(LevelData::Succ(x, hasher.finalize()))) } + /// Constructs `max x y`. pub fn max(x: Level, y: Level) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[UMAX]); @@ -173,6 +263,7 @@ impl Level { hasher.update(y.get_hash().as_bytes()); Level(Arc::new(LevelData::Max(x, y, hasher.finalize()))) } + /// Constructs `imax x y` (impredicative max). pub fn imax(x: Level, y: Level) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[UIMAX]); @@ -180,12 +271,14 @@ impl Level { hasher.update(y.get_hash().as_bytes()); Level(Arc::new(LevelData::Imax(x, y, hasher.finalize()))) } + /// Constructs a universe parameter with the given name. pub fn param(x: Name) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[UPARAM]); hasher.update(x.get_hash().as_bytes()); Level(Arc::new(LevelData::Param(x, hasher.finalize()))) } + /// Constructs a universe-level metavariable with the given name. pub fn mvar(x: Name) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[UMVAR]); @@ -200,9 +293,12 @@ impl StdHash for Level { } } +/// A literal value embedded in an expression. #[derive(Debug, PartialEq, Eq)] pub enum Literal { + /// A natural number literal. NatVal(Nat), + /// A string literal. StrVal(String), } @@ -213,7 +309,6 @@ impl PartialOrd for Literal { } // should match Literal.lt here https://github.com/leanprover/lean4/blob/fe21b950586cde248dae4b2a0f59d43c1f19cd87/src/Lean/Expr.lean#L34 -// TODO: test that Nat and String comparisons match impl Ord for Literal { fn cmp(&self, other: &Self) -> std::cmp::Ordering { match (self, other) { @@ -225,11 +320,16 @@ impl Ord for Literal { } } -#[derive(Debug, PartialEq, Eq, Clone)] +/// Binder annotation kind, mirroring Lean 4's `BinderInfo`. +#[derive(Debug, PartialEq, Eq, Clone, Hash)] pub enum BinderInfo { + /// Explicit binder `(x : A)`. Default, + /// Implicit binder `{x : A}`. Implicit, + /// Strict implicit binder `{{x : A}}`. StrictImplicit, + /// Instance implicit binder `[x : A]`. InstImplicit, } @@ -262,10 +362,14 @@ fn hash_int(i: &Int, hasher: &mut blake3::Hasher) { } } +/// A substring reference: a string together with start and stop byte positions. #[derive(Debug, PartialEq, Eq, Clone)] pub struct Substring { + /// The underlying string. pub str: String, + /// The start byte position (inclusive). pub start_pos: Nat, + /// The stop byte position (exclusive). pub stop_pos: Nat, } @@ -276,10 +380,14 @@ fn hash_substring(ss: &Substring, hasher: &mut blake3::Hasher) { hasher.update(&ss.stop_pos.to_le_bytes()); } +/// Source location metadata attached to syntax nodes. #[derive(Debug, PartialEq, Eq, Clone)] pub enum SourceInfo { + /// Original source with leading whitespace, leading position, trailing whitespace, trailing position. Original(Substring, Nat, Substring, Nat), + /// Synthetic source span with start position, stop position, and canonical flag. Synthetic(Nat, Nat, bool), + /// No source information available. None, } @@ -305,9 +413,12 @@ fn hash_source_info(si: &SourceInfo, hasher: &mut blake3::Hasher) { } } +/// Pre-resolved reference attached to a syntax identifier. #[derive(Debug, PartialEq, Eq, Clone)] pub enum SyntaxPreresolved { + /// A pre-resolved namespace reference. Namespace(Name), + /// A pre-resolved declaration reference with alias strings. Decl(Name, Vec), } @@ -332,11 +443,16 @@ fn hash_syntax_preresolved( } } +/// A Lean 4 concrete syntax tree node. #[derive(Debug, PartialEq, Eq, Clone)] pub enum Syntax { + /// Placeholder for missing syntax. Missing, + /// An interior syntax node with a kind name and child nodes. Node(SourceInfo, Name, Vec), + /// An atomic token (keyword, symbol, etc.). Atom(SourceInfo, String), + /// An identifier with optional pre-resolved references. Ident(SourceInfo, Substring, Name, Vec), } @@ -350,7 +466,7 @@ fn hash_syntax(syn: &Syntax, hasher: &mut blake3::Hasher) { hasher.update(&[1]); hash_source_info(info, hasher); hasher.update(kind.get_hash().as_bytes()); - hasher.update(&(args.len() as u64).to_le_bytes()); + hasher.update(&Nat::from(args.len() as u64).to_le_bytes()); for arg in args { hash_syntax(arg, hasher); } @@ -365,7 +481,7 @@ fn hash_syntax(syn: &Syntax, hasher: &mut blake3::Hasher) { hash_source_info(info, hasher); hash_substring(raw_val, hasher); hasher.update(val.get_hash().as_bytes()); - hasher.update(&(preresolved.len() as u64).to_le_bytes()); + hasher.update(&Nat::from(preresolved.len() as u64).to_le_bytes()); for pr in preresolved { hash_syntax_preresolved(pr, hasher); } @@ -373,13 +489,20 @@ fn hash_syntax(syn: &Syntax, hasher: &mut blake3::Hasher) { } } +/// A dynamically-typed value stored in expression metadata (`KVMap` entries). #[derive(Debug, PartialEq, Eq, Clone)] pub enum DataValue { + /// A string value. OfString(String), + /// A boolean value. OfBool(bool), + /// A name value. OfName(Name), + /// A natural number value. OfNat(Nat), + /// An integer value. OfInt(Int), + /// A syntax tree value. OfSyntax(Box), } @@ -413,30 +536,50 @@ fn hash_data_value(dv: &DataValue, hasher: &mut blake3::Hasher) { } } +/// A content-addressed Lean 4 kernel expression. +/// +/// Expressions are interned via `Arc` and compared/hashed by their Blake3 digest. #[derive(PartialEq, Eq, Debug, Clone)] pub struct Expr(pub Arc); +/// The underlying data for an [`Expr`]. +/// +/// Each variant carries its precomputed Blake3 hash as the last field. #[derive(Debug, PartialEq, Eq)] pub enum ExprData { + /// Bound variable (de Bruijn index). Bvar(Nat, Hash), + /// Free variable. Fvar(Name, Hash), + /// Metavariable. Mvar(Name, Hash), + /// Sort (universe). Sort(Level, Hash), + /// Reference to a named constant with universe level arguments. Const(Name, Vec, Hash), + /// Function application. App(Expr, Expr, Hash), + /// Lambda abstraction. Lam(Name, Expr, Expr, BinderInfo, Hash), + /// Dependent function type (Pi / forall). ForallE(Name, Expr, Expr, BinderInfo, Hash), + /// Let-binding (name, type, value, body, non-dep flag). LetE(Name, Expr, Expr, Expr, bool, Hash), + /// Literal value (nat or string). Lit(Literal, Hash), + /// Metadata-annotated expression with key-value pairs. Mdata(Vec<(Name, DataValue)>, Expr, Hash), + /// Projection from a structure (type name, field index, struct expr). Proj(Name, Nat, Expr, Hash), } impl Expr { + /// Returns a reference to the inner [`ExprData`]. pub fn as_data(&self) -> &ExprData { &self.0 } + /// Returns the precomputed Blake3 hash of this expression. pub fn get_hash(&self) -> &Hash { match &*self.0 { ExprData::Bvar(_, h) @@ -453,6 +596,7 @@ impl Expr { | ExprData::Proj(.., h) => h, } } + /// Constructs a bound variable expression from a de Bruijn index. pub fn bvar(x: Nat) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[EVAR]); @@ -460,6 +604,7 @@ impl Expr { Expr(Arc::new(ExprData::Bvar(x, hasher.finalize()))) } + /// Constructs a free variable expression. pub fn fvar(x: Name) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[EFVAR]); @@ -467,6 +612,7 @@ impl Expr { Expr(Arc::new(ExprData::Fvar(x, hasher.finalize()))) } + /// Constructs a metavariable expression. pub fn mvar(x: Name) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[EMVAR]); @@ -474,6 +620,7 @@ impl Expr { Expr(Arc::new(ExprData::Mvar(x, hasher.finalize()))) } + /// Constructs a sort expression from a universe level. pub fn sort(x: Level) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[ESORT]); @@ -481,6 +628,7 @@ impl Expr { Expr(Arc::new(ExprData::Sort(x, hasher.finalize()))) } + /// Constructs a constant reference with universe level arguments. pub fn cnst(x: Name, us: Vec) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[EREF]); @@ -491,6 +639,7 @@ impl Expr { Expr(Arc::new(ExprData::Const(x, us, hasher.finalize()))) } + /// Constructs a function application `f a`. pub fn app(f: Expr, a: Expr) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[EAPP]); @@ -499,6 +648,7 @@ impl Expr { Expr(Arc::new(ExprData::App(f, a, hasher.finalize()))) } + /// Constructs a lambda abstraction `fun (n : t) => b`. pub fn lam(n: Name, t: Expr, b: Expr, bi: BinderInfo) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[ELAM]); @@ -509,6 +659,7 @@ impl Expr { Expr(Arc::new(ExprData::Lam(n, t, b, bi, hasher.finalize()))) } + /// Constructs a dependent function type (forall / Pi). pub fn all(n: Name, t: Expr, b: Expr, bi: BinderInfo) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[EALL]); @@ -519,6 +670,7 @@ impl Expr { Expr(Arc::new(ExprData::ForallE(n, t, b, bi, hasher.finalize()))) } + /// Constructs a let-binding `let n : t := v in b`. #[allow(non_snake_case)] pub fn letE(n: Name, t: Expr, v: Expr, b: Expr, nd: bool) -> Self { let mut hasher = blake3::Hasher::new(); @@ -531,6 +683,7 @@ impl Expr { Expr(Arc::new(ExprData::LetE(n, t, v, b, nd, hasher.finalize()))) } + /// Constructs a literal expression (nat or string). pub fn lit(x: Literal) -> Self { let mut hasher = blake3::Hasher::new(); match &x { @@ -546,10 +699,11 @@ impl Expr { Expr(Arc::new(ExprData::Lit(x, hasher.finalize()))) } + /// Constructs a metadata-annotated expression. pub fn mdata(xs: Vec<(Name, DataValue)>, e: Expr) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[EMDATA]); - hasher.update(&(xs.len() as u64).to_le_bytes()); + hasher.update(&Nat::from(xs.len() as u64).to_le_bytes()); for (name, dv) in &xs { hasher.update(name.get_hash().as_bytes()); hash_data_value(dv, &mut hasher); @@ -558,6 +712,7 @@ impl Expr { Expr(Arc::new(ExprData::Mdata(xs, e, hasher.finalize()))) } + /// Constructs a projection expression (type name, field index, struct expr). pub fn proj(n: Name, i: Nat, e: Expr) -> Self { let mut hasher = blake3::Hasher::new(); hasher.update(&[EPRJ]); @@ -593,10 +748,15 @@ impl StdHash for ExprData { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// Hints that control how aggressively the kernel unfolds a definition. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum ReducibilityHints { + /// Never unfold. + #[default] Opaque, + /// Always unfold (abbreviation). Abbrev, + /// Unfold with the given priority height. Regular(u32), } @@ -618,10 +778,14 @@ fn hash_reducibility_hints( }; } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +/// Safety classification of a definition. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum DefinitionSafety { + /// Marked `unsafe`; no termination or totality guarantees. Unsafe, + /// Fully safe and total. Safe, + /// Partial definition; may not terminate on all inputs. Partial, } @@ -636,10 +800,14 @@ fn hash_definition_safety( }; } +/// Fields common to every constant declaration. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ConstantVal { + /// The fully-qualified name of the constant. pub name: Name, + /// Universe-polymorphic level parameter names. pub level_params: Vec, + /// The type of the constant. pub typ: Expr, } @@ -652,37 +820,55 @@ fn hash_constant_val(cv: &ConstantVal, hasher: &mut blake3::Hasher) { hasher.update(cv.typ.get_hash().as_bytes()); } +/// An axiom declaration (no definitional body). #[derive(Debug, Clone)] pub struct AxiomVal { + /// Common constant fields. pub cnst: ConstantVal, + /// Whether this axiom is marked `unsafe`. pub is_unsafe: bool, } +/// A definition with a computable body. #[derive(Debug, Clone)] pub struct DefinitionVal { + /// Common constant fields. pub cnst: ConstantVal, + /// The definition body. pub value: Expr, + /// Reducibility hints for the kernel. pub hints: ReducibilityHints, + /// Safety classification. pub safety: DefinitionSafety, + /// Names of all constants in the same mutual block. pub all: Vec, } +/// A theorem declaration (proof-irrelevant; body is never reduced). #[derive(Debug, Clone)] pub struct TheoremVal { + /// Common constant fields. pub cnst: ConstantVal, + /// The proof term. pub value: Expr, + /// Names of all constants in the same mutual block. pub all: Vec, } +/// An opaque constant (body exists but is not unfolded by the kernel). #[derive(Debug, Clone)] pub struct OpaqueVal { + /// Common constant fields. pub cnst: ConstantVal, + /// The opaque body. pub value: Expr, + /// Whether this opaque constant is marked `unsafe`. pub is_unsafe: bool, + /// Names of all constants in the same mutual block. pub all: Vec, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum QuotKind { Type, Ctor, @@ -699,39 +885,63 @@ fn hash_quot_kind(kind: &QuotKind, hasher: &mut blake3::Hasher) { }; } +/// A quotient-type related constant. #[derive(Debug, Clone)] pub struct QuotVal { + /// Common constant fields. pub cnst: ConstantVal, + /// Which quotient primitive this constant represents. pub kind: QuotKind, } +/// An inductive type declaration. #[derive(Debug, Clone, PartialEq, Eq)] pub struct InductiveVal { + /// Common constant fields. pub cnst: ConstantVal, + /// Number of parameters. pub num_params: Nat, + /// Number of indices. pub num_indices: Nat, + /// Names of all types in the same mutual inductive block. pub all: Vec, + /// Names of the constructors for this type. pub ctors: Vec, + /// Number of nested (non-mutual) inductives. pub num_nested: Nat, + /// Whether this inductive type is recursive. pub is_rec: bool, + /// Whether this inductive type is marked `unsafe`. pub is_unsafe: bool, + /// Whether this inductive type is reflexive. pub is_reflexive: bool, } +/// A constructor of an inductive type. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ConstructorVal { + /// Common constant fields. pub cnst: ConstantVal, + /// Name of the parent inductive type. pub induct: Name, + /// Constructor index within the inductive type. pub cidx: Nat, + /// Number of parameters inherited from the inductive type. pub num_params: Nat, + /// Number of fields (non-parameter arguments). pub num_fields: Nat, + /// Whether this constructor is marked `unsafe`. pub is_unsafe: bool, } +/// A single reduction rule for a recursor, mapping a constructor to its branch. #[derive(Debug, Clone, PartialEq, Eq)] pub struct RecursorRule { + /// The constructor this rule applies to. pub ctor: Name, + /// Number of fields the constructor has. pub n_fields: Nat, + /// The right-hand side expression for this branch. pub rhs: Expr, } @@ -741,32 +951,52 @@ fn hash_recursor_rule(rule: &RecursorRule, hasher: &mut blake3::Hasher) { hasher.update(rule.rhs.get_hash().as_bytes()); } +/// A recursor (eliminator) for an inductive type. #[derive(Debug, Clone, PartialEq, Eq)] pub struct RecursorVal { + /// Common constant fields. pub cnst: ConstantVal, + /// Names of all types in the same mutual inductive block. pub all: Vec, + /// Number of parameters. pub num_params: Nat, + /// Number of indices. pub num_indices: Nat, + /// Number of motive arguments. pub num_motives: Nat, + /// Number of minor premise arguments. pub num_minors: Nat, + /// Reduction rules, one per constructor. pub rules: Vec, + /// Whether this is a K-like recursor (proof-irrelevant elimination). pub k: bool, + /// Whether this recursor is marked `unsafe`. pub is_unsafe: bool, } +/// A top-level constant declaration in the Lean environment. #[derive(Debug, Clone)] pub enum ConstantInfo { + /// An axiom. AxiomInfo(AxiomVal), + /// A definition with a computable body. DefnInfo(DefinitionVal), + /// A theorem (proof-irrelevant). ThmInfo(TheoremVal), + /// An opaque constant. OpaqueInfo(OpaqueVal), + /// A quotient primitive. QuotInfo(QuotVal), + /// An inductive type. InductInfo(InductiveVal), + /// A constructor of an inductive type. CtorInfo(ConstructorVal), + /// A recursor (eliminator). RecInfo(RecursorVal), } impl ConstantInfo { + /// Computes the Blake3 content hash of this constant declaration. pub fn get_hash(&self) -> Hash { let mut hasher = blake3::Hasher::new(); match self { @@ -859,6 +1089,7 @@ impl ConstantInfo { hasher.finalize() } + /// Returns the name of this constant. pub fn get_name(&self) -> &Name { match self { ConstantInfo::AxiomInfo(v) => &v.cnst.name, @@ -872,6 +1103,7 @@ impl ConstantInfo { } } + /// Returns the type of this constant. pub fn get_type(&self) -> &Expr { match self { ConstantInfo::AxiomInfo(v) => &v.cnst.typ, @@ -885,6 +1117,7 @@ impl ConstantInfo { } } + /// Returns the universe level parameter names of this constant. pub fn get_level_params(&self) -> &Vec { match self { ConstantInfo::AxiomInfo(v) => &v.cnst.level_params, @@ -899,4 +1132,5 @@ impl ConstantInfo { } } +/// The Lean kernel environment: a map from names to their constant declarations. pub type Env = FxHashMap; diff --git a/src/ix/graph.rs b/src/ix/graph.rs index f2aaed55..86d211fc 100644 --- a/src/ix/graph.rs +++ b/src/ix/graph.rs @@ -1,9 +1,17 @@ +//! Builds a reference graph from a Lean environment. +//! +//! The graph tracks which constants reference which other constants, maintaining +//! both forward (`out_refs`) and reverse (`in_refs`) edges. This is used to +//! compute SCCs (strongly connected components) for mutual block detection. +//! Construction is parallelized via rayon. + use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use rustc_hash::{FxHashMap, FxHashSet}; use std::collections::hash_map::Entry; use crate::ix::env::{ConstantInfo, Env, Expr, ExprData, Name}; +/// A set of [`Name`]s, used to represent the neighbors of a node in the reference graph. pub type NameSet = FxHashSet; /// Absorbs the elements of the smaller [`NameSet`] into the bigger one and returns @@ -18,10 +26,10 @@ pub fn merge_name_sets(mut a: NameSet, mut b: NameSet) -> NameSet { } } -/// A general-purpose map from names to name sets. +/// Maps each [`Name`] to the set of [`Name`]s it is associated with. pub type RefMap = FxHashMap; -/// A reference graph of names. +/// A bidirectional reference graph over [`Name`]s, storing both forward and reverse edges. /// ```ignored /// A ──> B ──> C <── D ──> E /// out_refs: [(A, [B]), (B, [C]), (C, []), (D, [C, E]), (E, [])] @@ -35,6 +43,10 @@ pub struct RefGraph { pub in_refs: RefMap, } +/// Builds a [`RefGraph`] from a Lean [`Env`] by collecting all constant references in parallel. +/// +/// For each constant, extracts the set of names it references (from types, values, constructors, +/// and recursor rules), then assembles both the forward and reverse edge maps. pub fn build_ref_graph(env: &Env) -> RefGraph { let mk_in_refs = |name: &Name, deps: &NameSet| -> RefMap { let mut in_refs = RefMap::from_iter([(name.clone(), NameSet::default())]); @@ -149,11 +161,300 @@ fn get_expr_references<'a>( merge_name_sets(value_name_set, body_name_set), ) }, - ExprData::Mdata(_, expr, _) | ExprData::Proj(_, _, expr, _) => { - get_expr_references(expr, cache) + ExprData::Mdata(_, expr, _) => get_expr_references(expr, cache), + ExprData::Proj(type_name, _, expr, _) => { + let mut name_set = get_expr_references(expr, cache); + name_set.insert(type_name.clone()); + name_set }, _ => NameSet::default(), }; cache.insert(expr, name_set.clone()); name_set } + +#[cfg(test)] +mod tests { + use super::*; + use crate::ix::env::*; + use crate::lean::nat::Nat; + + fn n(s: &str) -> Name { + Name::str(Name::anon(), s.to_string()) + } + + fn sort0() -> Expr { + Expr::sort(Level::zero()) + } + + fn mk_cv(name: &str) -> ConstantVal { + ConstantVal { name: n(name), level_params: vec![], typ: sort0() } + } + + #[test] + fn empty_env() { + let env = Env::default(); + let graph = build_ref_graph(&env); + assert!(graph.out_refs.is_empty()); + assert!(graph.in_refs.is_empty()); + } + + #[test] + fn axiom_no_deps() { + // Axiom A : Sort 0 — references nothing + let mut env = Env::default(); + env.insert( + n("A"), + ConstantInfo::AxiomInfo(AxiomVal { cnst: mk_cv("A"), is_unsafe: false }), + ); + let graph = build_ref_graph(&env); + assert!(graph.out_refs[&n("A")].is_empty()); + assert!(graph.in_refs[&n("A")].is_empty()); + } + + #[test] + fn defn_with_const_refs() { + // B : Sort 0, defn A : B := B + // A's type refs B, A's value refs B + let mut env = Env::default(); + env.insert( + n("B"), + ConstantInfo::AxiomInfo(AxiomVal { cnst: mk_cv("B"), is_unsafe: false }), + ); + let b_ref = Expr::cnst(n("B"), vec![]); + env.insert( + n("A"), + ConstantInfo::DefnInfo(DefinitionVal { + cnst: ConstantVal { + name: n("A"), + level_params: vec![], + typ: b_ref.clone(), + }, + value: b_ref, + hints: ReducibilityHints::Opaque, + safety: DefinitionSafety::Safe, + all: vec![n("A")], + }), + ); + let graph = build_ref_graph(&env); + // A references B + assert!(graph.out_refs[&n("A")].contains(&n("B"))); + // B is referenced by A + assert!(graph.in_refs[&n("B")].contains(&n("A"))); + // B references nothing + assert!(graph.out_refs[&n("B")].is_empty()); + } + + #[test] + fn inductive_includes_ctors() { + // Inductive T with constructors T.mk1, T.mk2 + let mut env = Env::default(); + env.insert( + n("T"), + ConstantInfo::InductInfo(InductiveVal { + cnst: mk_cv("T"), + num_params: Nat::from(0u64), + num_indices: Nat::from(0u64), + all: vec![n("T")], + ctors: vec![n("T.mk1"), n("T.mk2")], + num_nested: Nat::from(0u64), + is_rec: false, + is_unsafe: false, + is_reflexive: false, + }), + ); + // Add constructors to env so they can be referenced + env.insert( + n("T.mk1"), + ConstantInfo::CtorInfo(ConstructorVal { + cnst: mk_cv("T.mk1"), + induct: n("T"), + cidx: Nat::from(0u64), + num_params: Nat::from(0u64), + num_fields: Nat::from(0u64), + is_unsafe: false, + }), + ); + env.insert( + n("T.mk2"), + ConstantInfo::CtorInfo(ConstructorVal { + cnst: mk_cv("T.mk2"), + induct: n("T"), + cidx: Nat::from(1u64), + num_params: Nat::from(0u64), + num_fields: Nat::from(0u64), + is_unsafe: false, + }), + ); + + let graph = build_ref_graph(&env); + // T references T.mk1 and T.mk2 (from ctors list) + assert!(graph.out_refs[&n("T")].contains(&n("T.mk1"))); + assert!(graph.out_refs[&n("T")].contains(&n("T.mk2"))); + } + + #[test] + fn ctor_includes_induct() { + // Constructor T.mk references its parent T + let mut env = Env::default(); + env.insert( + n("T"), + ConstantInfo::AxiomInfo(AxiomVal { cnst: mk_cv("T"), is_unsafe: false }), + ); + env.insert( + n("T.mk"), + ConstantInfo::CtorInfo(ConstructorVal { + cnst: mk_cv("T.mk"), + induct: n("T"), + cidx: Nat::from(0u64), + num_params: Nat::from(0u64), + num_fields: Nat::from(0u64), + is_unsafe: false, + }), + ); + let graph = build_ref_graph(&env); + assert!(graph.out_refs[&n("T.mk")].contains(&n("T"))); + } + + #[test] + fn in_refs_bidirectional() { + // A -> B, C -> B + let mut env = Env::default(); + let b_ref = Expr::cnst(n("B"), vec![]); + env.insert( + n("B"), + ConstantInfo::AxiomInfo(AxiomVal { cnst: mk_cv("B"), is_unsafe: false }), + ); + env.insert( + n("A"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { + name: n("A"), + level_params: vec![], + typ: b_ref.clone(), + }, + is_unsafe: false, + }), + ); + env.insert( + n("C"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { name: n("C"), level_params: vec![], typ: b_ref }, + is_unsafe: false, + }), + ); + let graph = build_ref_graph(&env); + // B's in_refs should contain both A and C + let b_in = &graph.in_refs[&n("B")]; + assert!(b_in.contains(&n("A"))); + assert!(b_in.contains(&n("C"))); + } + + #[test] + fn recursor_refs_rules() { + // Recursor T.rec with a rule for T.mk whose rhs references Q + let mut env = Env::default(); + env.insert( + n("T.mk"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: mk_cv("T.mk"), + is_unsafe: false, + }), + ); + env.insert( + n("Q"), + ConstantInfo::AxiomInfo(AxiomVal { cnst: mk_cv("Q"), is_unsafe: false }), + ); + env.insert( + n("T.rec"), + ConstantInfo::RecInfo(RecursorVal { + cnst: mk_cv("T.rec"), + all: vec![n("T")], + num_params: Nat::from(0u64), + num_indices: Nat::from(0u64), + num_motives: Nat::from(1u64), + num_minors: Nat::from(1u64), + rules: vec![RecursorRule { + ctor: n("T.mk"), + n_fields: Nat::from(0u64), + rhs: Expr::cnst(n("Q"), vec![]), + }], + k: false, + is_unsafe: false, + }), + ); + let graph = build_ref_graph(&env); + let rec_out = &graph.out_refs[&n("T.rec")]; + // References the ctor from the rule + assert!(rec_out.contains(&n("T.mk"))); + // References Q from the rule's rhs + assert!(rec_out.contains(&n("Q"))); + } + + #[test] + fn expr_references_through_app_lam_let() { + // Test that get_expr_references traverses App, Lam, LetE, Proj + let mut env = Env::default(); + env.insert( + n("X"), + ConstantInfo::AxiomInfo(AxiomVal { cnst: mk_cv("X"), is_unsafe: false }), + ); + env.insert( + n("Y"), + ConstantInfo::AxiomInfo(AxiomVal { cnst: mk_cv("Y"), is_unsafe: false }), + ); + env.insert( + n("Z"), + ConstantInfo::AxiomInfo(AxiomVal { cnst: mk_cv("Z"), is_unsafe: false }), + ); + // Build: fun (_ : X) => let _ : Y := #0 in Z + let x_ref = Expr::cnst(n("X"), vec![]); + let y_ref = Expr::cnst(n("Y"), vec![]); + let z_ref = Expr::cnst(n("Z"), vec![]); + let body = Expr::letE( + Name::anon(), + y_ref, + Expr::bvar(Nat::from(0u64)), + z_ref, + false, + ); + let lam = Expr::lam(Name::anon(), x_ref, body, BinderInfo::Default); + env.insert( + n("W"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { name: n("W"), level_params: vec![], typ: lam }, + is_unsafe: false, + }), + ); + let graph = build_ref_graph(&env); + let w_out = &graph.out_refs[&n("W")]; + assert!(w_out.contains(&n("X"))); + assert!(w_out.contains(&n("Y"))); + assert!(w_out.contains(&n("Z"))); + } + + #[test] + fn proj_references_type_name() { + // Proj references the type name it projects from + let mut env = Env::default(); + env.insert( + n("S"), + ConstantInfo::AxiomInfo(AxiomVal { cnst: mk_cv("S"), is_unsafe: false }), + ); + let proj_expr = + Expr::proj(n("S"), Nat::from(0u64), Expr::bvar(Nat::from(0u64))); + env.insert( + n("P"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { + name: n("P"), + level_params: vec![], + typ: proj_expr, + }, + is_unsafe: false, + }), + ); + let graph = build_ref_graph(&env); + assert!(graph.out_refs[&n("P")].contains(&n("S"))); + } +} diff --git a/src/ix/ground.rs b/src/ix/ground.rs index 1bf30615..008d00fd 100644 --- a/src/ix/ground.rs +++ b/src/ix/ground.rs @@ -1,3 +1,10 @@ +//! Groundedness checking for Lean environment constants. +//! +//! A constant is "grounded" if all its references resolve to known constants, all +//! bound variables are in scope, and no metavariables remain. Ungroundedness +//! propagates through the reference graph: if A references ungrounded B, then A +//! is also ungrounded. + use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use rustc_hash::{FxHashMap, FxHashSet}; use std::collections::hash_map::Entry; @@ -10,16 +17,27 @@ use crate::{ lean::nat::Nat, }; +/// Reason a constant failed groundedness checking. #[derive(Debug)] pub enum GroundError<'a> { + /// A universe level parameter or metavariable is not in scope. Level(Level, Vec), + /// A referenced constant does not exist in the environment (or is itself ungrounded). Ref(Name), + /// An expression-level metavariable was encountered. MVar(Expr), + /// A free or out-of-scope bound variable was encountered. Var(Expr, usize), + /// An inductive type's constructor is missing or has the wrong kind. Indc(&'a InductiveVal, Option<&'a ConstantInfo>), + /// An invalid de Bruijn index. Idx(Nat), } +/// Checks every constant in `env` for groundedness and returns a map of all ungrounded names. +/// +/// First collects immediately ungrounded constants in parallel, then propagates +/// ungroundedness transitively through `in_refs` (the reverse reference graph). pub fn ground_consts<'a>( env: &'a Env, in_refs: &RefMap, @@ -204,3 +222,316 @@ fn ground_level<'a>( }, } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::ix::env::*; + use crate::ix::graph::build_ref_graph; + + fn n(s: &str) -> Name { + Name::str(Name::anon(), s.to_string()) + } + + fn sort0() -> Expr { + Expr::sort(Level::zero()) + } + + fn mk_cv(name: &str) -> ConstantVal { + ConstantVal { name: n(name), level_params: vec![], typ: sort0() } + } + + fn check(env: &Env) -> FxHashMap> { + let graph = build_ref_graph(env); + ground_consts(env, &graph.in_refs) + } + + #[test] + fn grounded_axiom() { + let mut env = Env::default(); + env.insert( + n("A"), + ConstantInfo::AxiomInfo(AxiomVal { cnst: mk_cv("A"), is_unsafe: false }), + ); + let errors = check(&env); + assert!(errors.is_empty(), "well-formed axiom should be grounded"); + } + + #[test] + fn grounded_defn_with_bvar_in_lam() { + // fun (_ : Sort 0) => #0 is grounded (bvar(0) under one binder) + let mut env = Env::default(); + let body = Expr::lam( + Name::anon(), + sort0(), + Expr::bvar(Nat::from(0u64)), + BinderInfo::Default, + ); + env.insert( + n("f"), + ConstantInfo::DefnInfo(DefinitionVal { + cnst: ConstantVal { name: n("f"), level_params: vec![], typ: sort0() }, + value: body, + hints: ReducibilityHints::Opaque, + safety: DefinitionSafety::Safe, + all: vec![n("f")], + }), + ); + assert!(check(&env).is_empty()); + } + + #[test] + fn ungrounded_missing_ref() { + // Axiom A : B, but B is not in env + let mut env = Env::default(); + env.insert( + n("A"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { + name: n("A"), + level_params: vec![], + typ: Expr::cnst(n("B"), vec![]), + }, + is_unsafe: false, + }), + ); + let errors = check(&env); + assert!(errors.contains_key(&n("A"))); + assert!(matches!(errors[&n("A")], GroundError::Ref(_))); + } + + #[test] + fn ungrounded_bvar_out_of_scope() { + // Axiom A : #0 (bvar with no enclosing binder) + let mut env = Env::default(); + env.insert( + n("A"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { + name: n("A"), + level_params: vec![], + typ: Expr::bvar(Nat::from(0u64)), + }, + is_unsafe: false, + }), + ); + let errors = check(&env); + assert!(errors.contains_key(&n("A"))); + assert!(matches!(errors[&n("A")], GroundError::Var(_, 0))); + } + + #[test] + fn ungrounded_mvar() { + // Axiom A : ?m + let mut env = Env::default(); + env.insert( + n("A"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { + name: n("A"), + level_params: vec![], + typ: Expr::mvar(n("m")), + }, + is_unsafe: false, + }), + ); + let errors = check(&env); + assert!(errors.contains_key(&n("A"))); + assert!(matches!(errors[&n("A")], GroundError::MVar(_))); + } + + #[test] + fn ungrounded_level_param() { + // Axiom A : Sort (Param "u"), but "u" not in level_params + let mut env = Env::default(); + env.insert( + n("A"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { + name: n("A"), + level_params: vec![], // u is not declared + typ: Expr::sort(Level::param(n("u"))), + }, + is_unsafe: false, + }), + ); + let errors = check(&env); + assert!(errors.contains_key(&n("A"))); + assert!(matches!(errors[&n("A")], GroundError::Level(_, _))); + } + + #[test] + fn grounded_level_param_when_declared() { + // Axiom A.{u} : Sort u — "u" is declared + let mut env = Env::default(); + env.insert( + n("A"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { + name: n("A"), + level_params: vec![n("u")], + typ: Expr::sort(Level::param(n("u"))), + }, + is_unsafe: false, + }), + ); + assert!(check(&env).is_empty()); + } + + #[test] + fn propagation_through_in_refs() { + // B is ungrounded (refs missing C), A refs B → A should also be ungrounded + let mut env = Env::default(); + env.insert( + n("B"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { + name: n("B"), + level_params: vec![], + typ: Expr::cnst(n("C"), vec![]), // C not in env + }, + is_unsafe: false, + }), + ); + env.insert( + n("A"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { + name: n("A"), + level_params: vec![], + typ: Expr::cnst(n("B"), vec![]), + }, + is_unsafe: false, + }), + ); + let errors = check(&env); + // B is directly ungrounded + assert!(errors.contains_key(&n("B"))); + // A is transitively ungrounded via Ref(B) + assert!(errors.contains_key(&n("A"))); + assert!(matches!(errors[&n("A")], GroundError::Ref(_))); + } + + #[test] + fn inductive_missing_ctor() { + // Inductive T lists T.mk as a ctor, but T.mk is not in env + let mut env = Env::default(); + env.insert( + n("T"), + ConstantInfo::InductInfo(InductiveVal { + cnst: mk_cv("T"), + num_params: Nat::from(0u64), + num_indices: Nat::from(0u64), + all: vec![n("T")], + ctors: vec![n("T.mk")], + num_nested: Nat::from(0u64), + is_rec: false, + is_unsafe: false, + is_reflexive: false, + }), + ); + let errors = check(&env); + assert!(errors.contains_key(&n("T"))); + assert!(matches!(errors[&n("T")], GroundError::Indc(_, _))); + } + + #[test] + fn inductive_ctor_wrong_kind() { + // Inductive T lists T.mk as a ctor, but T.mk is an axiom not a ctor + let mut env = Env::default(); + env.insert( + n("T"), + ConstantInfo::InductInfo(InductiveVal { + cnst: mk_cv("T"), + num_params: Nat::from(0u64), + num_indices: Nat::from(0u64), + all: vec![n("T")], + ctors: vec![n("T.mk")], + num_nested: Nat::from(0u64), + is_rec: false, + is_unsafe: false, + is_reflexive: false, + }), + ); + env.insert( + n("T.mk"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: mk_cv("T.mk"), + is_unsafe: false, + }), + ); + let errors = check(&env); + assert!(errors.contains_key(&n("T"))); + assert!(matches!(errors[&n("T")], GroundError::Indc(_, Some(_)))); + } + + #[test] + fn binding_increments_depth() { + // fun (_ : Sort 0) => #0 is grounded (bvar under 1 binder) + // but fun (_ : Sort 0) => #1 is ungrounded (bvar escapes) + let mut env = Env::default(); + + // Grounded case + env.insert( + n("ok"), + ConstantInfo::DefnInfo(DefinitionVal { + cnst: ConstantVal { name: n("ok"), level_params: vec![], typ: sort0() }, + value: Expr::lam( + Name::anon(), + sort0(), + Expr::bvar(Nat::from(0u64)), + BinderInfo::Default, + ), + hints: ReducibilityHints::Opaque, + safety: DefinitionSafety::Safe, + all: vec![n("ok")], + }), + ); + + // Ungrounded case + env.insert( + n("bad"), + ConstantInfo::DefnInfo(DefinitionVal { + cnst: ConstantVal { + name: n("bad"), + level_params: vec![], + typ: sort0(), + }, + value: Expr::lam( + Name::anon(), + sort0(), + Expr::bvar(Nat::from(1u64)), // escapes the single binder + BinderInfo::Default, + ), + hints: ReducibilityHints::Opaque, + safety: DefinitionSafety::Safe, + all: vec![n("bad")], + }), + ); + + let errors = check(&env); + assert!(!errors.contains_key(&n("ok"))); + assert!(errors.contains_key(&n("bad"))); + assert!(matches!(errors[&n("bad")], GroundError::Var(_, 1))); + } + + #[test] + fn fvar_is_ungrounded() { + // Free variables should be ungrounded + let mut env = Env::default(); + env.insert( + n("A"), + ConstantInfo::AxiomInfo(AxiomVal { + cnst: ConstantVal { + name: n("A"), + level_params: vec![], + typ: Expr::fvar(n("x")), + }, + is_unsafe: false, + }), + ); + let errors = check(&env); + assert!(errors.contains_key(&n("A"))); + assert!(matches!(errors[&n("A")], GroundError::Var(_, 0))); + } +} diff --git a/src/ix/ixon.rs b/src/ix/ixon.rs index 953e6794..cda202a8 100644 --- a/src/ix/ixon.rs +++ b/src/ix/ixon.rs @@ -1,1665 +1,47 @@ -use num_bigint::BigUint; - -use crate::{ - ix::env::{ - BinderInfo, DefinitionSafety, Int, Name, QuotKind, ReducibilityHints, - }, - lean::nat::*, -}; - -use crate::ix::address::*; - -pub trait Serialize: Sized { - fn put(&self, buf: &mut Vec); - fn get(buf: &mut &[u8]) -> Result; -} - -impl Serialize for u8 { - fn put(&self, buf: &mut Vec) { - buf.push(*self) - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_first() { - Some((&x, rest)) => { - *buf = rest; - Ok(x) - }, - None => Err("get u8 EOF".to_string()), - } - } -} - -impl Serialize for u16 { - fn put(&self, buf: &mut Vec) { - buf.extend_from_slice(&self.to_le_bytes()); - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(2) { - Some((head, rest)) => { - *buf = rest; - Ok(u16::from_le_bytes([head[0], head[1]])) - }, - None => Err("get u16 EOF".to_string()), - } - } -} - -impl Serialize for u32 { - fn put(&self, buf: &mut Vec) { - buf.extend_from_slice(&self.to_le_bytes()); - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(4) { - Some((head, rest)) => { - *buf = rest; - Ok(u32::from_le_bytes([head[0], head[1], head[2], head[3]])) - }, - None => Err("get u32 EOF".to_string()), - } - } -} - -impl Serialize for u64 { - fn put(&self, buf: &mut Vec) { - buf.extend_from_slice(&self.to_le_bytes()); - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(8) { - Some((head, rest)) => { - *buf = rest; - Ok(u64::from_le_bytes([ - head[0], head[1], head[2], head[3], head[4], head[5], head[6], - head[7], - ])) - }, - None => Err("get u64 EOF".to_string()), - } - } -} - -impl Serialize for bool { - fn put(&self, buf: &mut Vec) { - match self { - false => buf.push(0), - true => buf.push(1), - } - } - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(false), - 1 => Ok(true), - x => Err(format!("get bool invalid {x}")), - } - }, - None => Err("get bool EOF".to_string()), - } - } -} - -pub fn u64_byte_count(x: u64) -> u8 { - match x { - 0 => 0, - x if x < 0x0000000000000100 => 1, - x if x < 0x0000000000010000 => 2, - x if x < 0x0000000001000000 => 3, - x if x < 0x0000000100000000 => 4, - x if x < 0x0000010000000000 => 5, - x if x < 0x0001000000000000 => 6, - x if x < 0x0100000000000000 => 7, - _ => 8, - } -} - -pub fn u64_put_trimmed_le(x: u64, buf: &mut Vec) { - let n = u64_byte_count(x) as usize; - buf.extend_from_slice(&x.to_le_bytes()[..n]) -} - -pub fn u64_get_trimmed_le(len: usize, buf: &mut &[u8]) -> Result { - let mut res = [0u8; 8]; - if len > 8 { - return Err("get trimmed_le_64 len > 8".to_string()); - } - match buf.split_at_checked(len) { - Some((head, rest)) => { - *buf = rest; - res[..len].copy_from_slice(head); - Ok(u64::from_le_bytes(res)) - }, - None => Err(format!("get trimmed_le_u64 EOF {len} {buf:?}")), - } -} - -// F := flag, L := large-bit, X := small-field, A := large_field -// 0xFFFF_LXXX {AAAA_AAAA, ...} -// "Tag" means the whole thing -// "Head" means the first byte of the tag -// "Flag" means the first nibble of the head -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct Tag4 { - flag: u8, - size: u64, -} - -impl Tag4 { - #[allow(clippy::cast_possible_truncation)] - pub fn encode_head(&self) -> u8 { - if self.size < 8 { - (self.flag << 4) + (self.size as u8) - } else { - (self.flag << 4) + 0b1000 + (u64_byte_count(self.size) - 1) - } - } - pub fn decode_head(head: u8) -> (u8, bool, u8) { - (head >> 4, head & 0b1000 != 0, head % 0b1000) - } -} - -impl Serialize for Tag4 { - fn put(&self, buf: &mut Vec) { - self.encode_head().put(buf); - if self.size >= 8 { - u64_put_trimmed_le(self.size, buf) - } - } - fn get(buf: &mut &[u8]) -> Result { - let head = u8::get(buf)?; - let (flag, large, small) = Tag4::decode_head(head); - let size = if large { - u64_get_trimmed_le((small + 1) as usize, buf)? - } else { - small as u64 - }; - Ok(Tag4 { flag, size }) - } -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub struct ByteArray(pub Vec); - -impl ByteArray { - fn put_slice(slice: &[u8], buf: &mut Vec) { - Tag4 { flag: 0x9, size: slice.len() as u64 }.put(buf); - buf.extend_from_slice(slice); - } -} - -impl Serialize for ByteArray { - fn put(&self, buf: &mut Vec) { - Self::put_slice(&self.0, buf); - } - fn get(buf: &mut &[u8]) -> Result { - let tag = Tag4::get(buf)?; - match tag { - Tag4 { flag: 0x9, size } => { - let mut res = vec![]; - for _ in 0..size { - res.push(u8::get(buf)?) - } - Ok(ByteArray(res)) - }, - _ => Err("expected Tag4 0x9 for Vec".to_string()), - } - } -} - -impl Serialize for String { - fn put(&self, buf: &mut Vec) { - let bytes = self.as_bytes(); - Tag4 { flag: 0x9, size: bytes.len() as u64 }.put(buf); - buf.extend_from_slice(bytes); - } - fn get(buf: &mut &[u8]) -> Result { - let bytes = ByteArray::get(buf)?; - String::from_utf8(bytes.0).map_err(|e| format!("Invalid UTF-8: {e}")) - } -} - -impl Serialize for Nat { - fn put(&self, buf: &mut Vec) { - let bytes = self.to_le_bytes(); - Tag4 { flag: 0x9, size: bytes.len() as u64 }.put(buf); - buf.extend_from_slice(&bytes); - } - fn get(buf: &mut &[u8]) -> Result { - let bytes = ByteArray::get(buf)?; - Ok(Nat::from_le_bytes(&bytes.0)) - } -} - -impl Serialize for Int { - fn put(&self, buf: &mut Vec) { - match self { - Self::OfNat(x) => { - buf.push(0); - x.put(buf); - }, - Self::NegSucc(x) => { - buf.push(1); - x.put(buf); - }, - } - } - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::OfNat(Nat::get(buf)?)), - 1 => Ok(Self::NegSucc(Nat::get(buf)?)), - x => Err(format!("get Int invalid {x}")), - } - }, - None => Err("get Int EOF".to_string()), - } - } -} - -impl Serialize for Vec { - fn put(&self, buf: &mut Vec) { - Nat(BigUint::from(self.len())).put(buf); - for x in self { - x.put(buf) - } - } - - fn get(buf: &mut &[u8]) -> Result { - let mut res = vec![]; - let len = Nat::get(buf)?.0; - let mut i = BigUint::from(0u32); - while i < len { - res.push(S::get(buf)?); - i += 1u32; - } - Ok(res) - } -} - -#[allow(clippy::cast_possible_truncation)] -pub fn pack_bools(bools: I) -> u8 -where - I: IntoIterator, -{ - let mut acc: u8 = 0; - for (i, b) in bools.into_iter().take(8).enumerate() { - if b { - acc |= 1u8 << (i as u32); - } - } - acc -} - -pub fn unpack_bools(n: usize, b: u8) -> Vec { - (0..8).map(|i: u32| (b & (1u8 << i)) != 0).take(n.min(8)).collect() -} - -impl Serialize for QuotKind { - fn put(&self, buf: &mut Vec) { - match self { - Self::Type => buf.push(0), - Self::Ctor => buf.push(1), - Self::Lift => buf.push(2), - Self::Ind => buf.push(3), - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Type), - 1 => Ok(Self::Ctor), - 2 => Ok(Self::Lift), - 3 => Ok(Self::Ind), - x => Err(format!("get QuotKind invalid {x}")), - } - }, - None => Err("get QuotKind EOF".to_string()), - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum DefKind { - Definition, - Opaque, - Theorem, -} - -impl Serialize for DefKind { - fn put(&self, buf: &mut Vec) { - match self { - Self::Definition => buf.push(0), - Self::Opaque => buf.push(1), - Self::Theorem => buf.push(2), - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Definition), - 1 => Ok(Self::Opaque), - 2 => Ok(Self::Theorem), - x => Err(format!("get DefKind invalid {x}")), - } - }, - None => Err("get DefKind EOF".to_string()), - } - } -} - -impl Serialize for BinderInfo { - fn put(&self, buf: &mut Vec) { - match self { - Self::Default => buf.push(0), - Self::Implicit => buf.push(1), - Self::StrictImplicit => buf.push(2), - Self::InstImplicit => buf.push(3), - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Default), - 1 => Ok(Self::Implicit), - 2 => Ok(Self::StrictImplicit), - 3 => Ok(Self::InstImplicit), - x => Err(format!("get BinderInfo invalid {x}")), - } - }, - None => Err("get BinderInfo EOF".to_string()), - } - } -} - -impl Serialize for ReducibilityHints { - fn put(&self, buf: &mut Vec) { - match self { - Self::Opaque => buf.push(0), - Self::Abbrev => buf.push(1), - Self::Regular(x) => { - buf.push(2); - x.put(buf); - }, - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Opaque), - 1 => Ok(Self::Abbrev), - 2 => { - let x: u32 = Serialize::get(buf)?; - Ok(Self::Regular(x)) - }, - x => Err(format!("get ReducibilityHints invalid {x}")), - } - }, - None => Err("get ReducibilityHints EOF".to_string()), - } - } -} - -impl Serialize for DefinitionSafety { - fn put(&self, buf: &mut Vec) { - match self { - Self::Unsafe => buf.push(0), - Self::Safe => buf.push(1), - Self::Partial => buf.push(2), - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Unsafe), - 1 => Ok(Self::Safe), - 2 => Ok(Self::Partial), - x => Err(format!("get DefSafety invalid {x}")), - } - }, - None => Err("get DefSafety EOF".to_string()), - } - } -} - -impl Serialize for (A, B) { - fn put(&self, buf: &mut Vec) { - self.0.put(buf); - self.1.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - Ok((A::get(buf)?, B::get(buf)?)) - } -} - -impl Serialize for Address { - fn put(&self, buf: &mut Vec) { - buf.extend_from_slice(self.as_bytes()) - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(32) { - Some((head, rest)) => { - *buf = rest; - Address::from_slice(head) - .map_err(|_e| "try from slice error".to_string()) - }, - None => Err("get Address out of input".to_string()), - } - } -} - -impl Serialize for MetaAddress { - fn put(&self, buf: &mut Vec) { - self.data.put(buf); - self.meta.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let data = Address::get(buf)?; - let meta = Address::get(buf)?; - Ok(MetaAddress { data, meta }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Quotient { - pub kind: QuotKind, - pub lvls: Nat, - pub typ: Address, -} - -impl Serialize for Quotient { - fn put(&self, buf: &mut Vec) { - self.kind.put(buf); - self.lvls.put(buf); - self.typ.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let kind = QuotKind::get(buf)?; - let lvls = Nat::get(buf)?; - let typ = Address::get(buf)?; - Ok(Quotient { kind, lvls, typ }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Axiom { - pub is_unsafe: bool, - pub lvls: Nat, - pub typ: Address, -} - -impl Serialize for Axiom { - fn put(&self, buf: &mut Vec) { - self.is_unsafe.put(buf); - self.lvls.put(buf); - self.typ.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let is_unsafe = bool::get(buf)?; - let lvls = Nat::get(buf)?; - let typ = Address::get(buf)?; - Ok(Axiom { lvls, typ, is_unsafe }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Definition { - pub kind: DefKind, - pub safety: DefinitionSafety, - pub lvls: Nat, - pub typ: Address, - pub value: Address, -} - -impl Serialize for Definition { - fn put(&self, buf: &mut Vec) { - self.kind.put(buf); - self.safety.put(buf); - self.lvls.put(buf); - self.typ.put(buf); - self.value.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let kind = DefKind::get(buf)?; - let safety = DefinitionSafety::get(buf)?; - let lvls = Nat::get(buf)?; - let typ = Address::get(buf)?; - let value = Address::get(buf)?; - Ok(Definition { kind, safety, lvls, typ, value }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Constructor { - pub is_unsafe: bool, - pub lvls: Nat, - pub cidx: Nat, - pub params: Nat, - pub fields: Nat, - pub typ: Address, -} - -impl Serialize for Constructor { - fn put(&self, buf: &mut Vec) { - self.is_unsafe.put(buf); - self.lvls.put(buf); - self.cidx.put(buf); - self.params.put(buf); - self.fields.put(buf); - self.typ.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let is_unsafe = bool::get(buf)?; - let lvls = Nat::get(buf)?; - let cidx = Nat::get(buf)?; - let params = Nat::get(buf)?; - let fields = Nat::get(buf)?; - let typ = Address::get(buf)?; - Ok(Constructor { lvls, typ, cidx, params, fields, is_unsafe }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct RecursorRule { - pub fields: Nat, - pub rhs: Address, -} - -impl Serialize for RecursorRule { - fn put(&self, buf: &mut Vec) { - self.fields.put(buf); - self.rhs.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let fields = Nat::get(buf)?; - let rhs = Address::get(buf)?; - Ok(RecursorRule { fields, rhs }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Recursor { - pub k: bool, - pub is_unsafe: bool, - pub lvls: Nat, - pub params: Nat, - pub indices: Nat, - pub motives: Nat, - pub minors: Nat, - pub typ: Address, - pub rules: Vec, -} - -impl Serialize for Recursor { - fn put(&self, buf: &mut Vec) { - pack_bools(vec![self.k, self.is_unsafe]).put(buf); - self.lvls.put(buf); - self.params.put(buf); - self.indices.put(buf); - self.motives.put(buf); - self.minors.put(buf); - self.typ.put(buf); - self.rules.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let bools = unpack_bools(2, u8::get(buf)?); - let lvls = Nat::get(buf)?; - let params = Nat::get(buf)?; - let indices = Nat::get(buf)?; - let motives = Nat::get(buf)?; - let minors = Nat::get(buf)?; - let typ = Serialize::get(buf)?; - let rules = Serialize::get(buf)?; - Ok(Recursor { - lvls, - typ, - params, - indices, - motives, - minors, - rules, - k: bools[0], - is_unsafe: bools[1], - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Inductive { - pub recr: bool, - pub refl: bool, - pub is_unsafe: bool, - pub lvls: Nat, - pub params: Nat, - pub indices: Nat, - pub nested: Nat, - pub typ: Address, - pub ctors: Vec, -} - -impl Serialize for Inductive { - fn put(&self, buf: &mut Vec) { - pack_bools(vec![self.recr, self.refl, self.is_unsafe]).put(buf); - self.lvls.put(buf); - self.params.put(buf); - self.indices.put(buf); - self.nested.put(buf); - self.typ.put(buf); - Serialize::put(&self.ctors, buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let bools = unpack_bools(3, u8::get(buf)?); - let lvls = Nat::get(buf)?; - let params = Nat::get(buf)?; - let indices = Nat::get(buf)?; - let nested = Nat::get(buf)?; - let typ = Address::get(buf)?; - let ctors = Serialize::get(buf)?; - Ok(Inductive { - recr: bools[0], - refl: bools[1], - is_unsafe: bools[2], - lvls, - params, - indices, - nested, - typ, - ctors, - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct InductiveProj { - pub idx: Nat, - pub block: Address, -} - -impl Serialize for InductiveProj { - fn put(&self, buf: &mut Vec) { - self.idx.put(buf); - self.block.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let idx = Nat::get(buf)?; - let block = Address::get(buf)?; - Ok(InductiveProj { idx, block }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct ConstructorProj { - pub idx: Nat, - pub cidx: Nat, - pub block: Address, -} - -impl Serialize for ConstructorProj { - fn put(&self, buf: &mut Vec) { - self.idx.put(buf); - self.cidx.put(buf); - self.block.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let idx = Nat::get(buf)?; - let cidx = Nat::get(buf)?; - let block = Address::get(buf)?; - Ok(ConstructorProj { idx, cidx, block }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct RecursorProj { - pub idx: Nat, - pub block: Address, -} - -impl Serialize for RecursorProj { - fn put(&self, buf: &mut Vec) { - self.idx.put(buf); - self.block.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let idx = Nat::get(buf)?; - let block = Address::get(buf)?; - Ok(RecursorProj { idx, block }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct DefinitionProj { - pub idx: Nat, - pub block: Address, -} - -impl Serialize for DefinitionProj { - fn put(&self, buf: &mut Vec) { - self.idx.put(buf); - self.block.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let idx = Nat::get(buf)?; - let block = Address::get(buf)?; - Ok(DefinitionProj { idx, block }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Comm { - pub secret: Address, - pub payload: Address, -} - -impl Serialize for Comm { - fn put(&self, buf: &mut Vec) { - self.secret.put(buf); - self.payload.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let secret = Address::get(buf)?; - let payload = Address::get(buf)?; - Ok(Comm { secret, payload }) - } -} -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct EvalClaim { - pub lvls: Address, - pub typ: Address, - pub input: Address, - pub output: Address, -} - -impl Serialize for EvalClaim { - fn put(&self, buf: &mut Vec) { - self.lvls.put(buf); - self.typ.put(buf); - self.input.put(buf); - self.output.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let lvls = Address::get(buf)?; - let typ = Address::get(buf)?; - let input = Address::get(buf)?; - let output = Address::get(buf)?; - Ok(Self { lvls, typ, input, output }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct CheckClaim { - pub lvls: Address, - pub typ: Address, - pub value: Address, -} - -impl Serialize for CheckClaim { - fn put(&self, buf: &mut Vec) { - self.lvls.put(buf); - self.typ.put(buf); - self.value.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let lvls = Address::get(buf)?; - let typ = Address::get(buf)?; - let value = Address::get(buf)?; - Ok(Self { lvls, typ, value }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Claim { - Evals(EvalClaim), - Checks(CheckClaim), -} - -impl Serialize for Claim { - fn put(&self, buf: &mut Vec) { - match self { - Self::Evals(x) => { - u8::put(&0xE1, buf); - x.put(buf) - }, - Self::Checks(x) => { - u8::put(&0xE2, buf); - x.put(buf) - }, - } - } - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0xE1 => { - let x = EvalClaim::get(buf)?; - Ok(Self::Evals(x)) - }, - 0xE2 => { - let x = CheckClaim::get(buf)?; - Ok(Self::Checks(x)) - }, - x => Err(format!("get Claim invalid {x}")), - } - }, - None => Err("get Claim EOF".to_string()), - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Proof { - pub claim: Claim, - pub proof: Vec, -} - -impl Serialize for Proof { - fn put(&self, buf: &mut Vec) { - self.claim.put(buf); - ByteArray::put_slice(&self.proof, buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let claim = Claim::get(buf)?; - let ByteArray(proof) = ByteArray::get(buf)?; - Ok(Proof { claim, proof }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Env { - pub env: Vec, -} - -impl Serialize for Env { - fn put(&self, buf: &mut Vec) { - self.env.put(buf) - } - - fn get(buf: &mut &[u8]) -> Result { - Ok(Env { env: Serialize::get(buf)? }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Substring { - pub str: Address, - pub start_pos: Nat, - pub stop_pos: Nat, -} - -impl Serialize for Substring { - fn put(&self, buf: &mut Vec) { - self.str.put(buf); - self.start_pos.put(buf); - self.stop_pos.put(buf); - } - - fn get(buf: &mut &[u8]) -> Result { - let str = Address::get(buf)?; - let start_pos = Nat::get(buf)?; - let stop_pos = Nat::get(buf)?; - Ok(Substring { str, start_pos, stop_pos }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum SourceInfo { - Original(Substring, Nat, Substring, Nat), - Synthetic(Nat, Nat, bool), - None, -} - -impl Serialize for SourceInfo { - fn put(&self, buf: &mut Vec) { - match self { - Self::Original(l, p, t, e) => { - buf.push(0); - l.put(buf); - p.put(buf); - t.put(buf); - e.put(buf); - }, - Self::Synthetic(p, e, c) => { - buf.push(1); - p.put(buf); - e.put(buf); - c.put(buf); - }, - Self::None => { - buf.push(2); - }, - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Original( - Substring::get(buf)?, - Nat::get(buf)?, - Substring::get(buf)?, - Nat::get(buf)?, - )), - 1 => { - Ok(Self::Synthetic(Nat::get(buf)?, Nat::get(buf)?, bool::get(buf)?)) - }, - 2 => Ok(Self::None), - x => Err(format!("get SourcInfo invalid {x}")), - } - }, - None => Err("get SourceInfo EOF".to_string()), - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Preresolved { - Namespace(Address), - Decl(Address, Vec
), -} - -impl Serialize for Preresolved { - fn put(&self, buf: &mut Vec) { - match self { - Self::Namespace(ns) => { - buf.push(0); - ns.put(buf); - }, - Self::Decl(n, fields) => { - buf.push(1); - n.put(buf); - fields.put(buf); - }, - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Namespace(Address::get(buf)?)), - 1 => Ok(Self::Decl(Address::get(buf)?, Vec::
::get(buf)?)), - x => Err(format!("get Preresolved invalid {x}")), - } - }, - None => Err("get Preresolved EOF".to_string()), - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Syntax { - Missing, - Node(SourceInfo, Address, Vec
), - Atom(SourceInfo, Address), - Ident(SourceInfo, Substring, Address, Vec), -} - -impl Serialize for Syntax { - fn put(&self, buf: &mut Vec) { - match self { - Self::Missing => { - buf.push(0); - }, - Self::Node(i, k, xs) => { - buf.push(1); - i.put(buf); - k.put(buf); - xs.put(buf); - }, - Self::Atom(i, v) => { - buf.push(2); - i.put(buf); - v.put(buf); - }, - Self::Ident(i, r, v, ps) => { - buf.push(3); - i.put(buf); - r.put(buf); - v.put(buf); - ps.put(buf); - }, - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Missing), - 1 => Ok(Self::Node( - SourceInfo::get(buf)?, - Address::get(buf)?, - Vec::
::get(buf)?, - )), - 2 => Ok(Self::Atom(SourceInfo::get(buf)?, Address::get(buf)?)), - 3 => Ok(Self::Ident( - SourceInfo::get(buf)?, - Substring::get(buf)?, - Address::get(buf)?, - Vec::::get(buf)?, - )), - x => Err(format!("get Syntax invalid {x}")), - } - }, - None => Err("get Syntax EOF".to_string()), - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum MutConst { - Defn(Definition), - Indc(Inductive), - Recr(Recursor), -} - -impl Serialize for MutConst { - fn put(&self, buf: &mut Vec) { - match self { - Self::Defn(x) => { - buf.push(0); - x.put(buf); - }, - Self::Indc(x) => { - buf.push(1); - x.put(buf); - }, - Self::Recr(x) => { - buf.push(2); - x.put(buf); - }, - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Defn(Definition::get(buf)?)), - 1 => Ok(Self::Indc(Inductive::get(buf)?)), - 2 => Ok(Self::Recr(Recursor::get(buf)?)), - x => Err(format!("get MutConst invalid {x}")), - } - }, - None => Err("get MutConst EOF".to_string()), - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum BuiltIn { - Obj, - Neutral, - Unreachable, -} - -impl BuiltIn { - pub fn name_of(&self) -> Name { - let s = match self { - Self::Obj => "_obj", - Self::Neutral => "_neutral", - Self::Unreachable => "_unreachable", - }; - Name::str(Name::anon(), s.to_string()) - } - pub fn from_name(name: &Name) -> Option { - if *name == BuiltIn::Obj.name_of() { - Some(BuiltIn::Obj) - } else if *name == BuiltIn::Neutral.name_of() { - Some(BuiltIn::Neutral) - } else if *name == BuiltIn::Unreachable.name_of() { - Some(BuiltIn::Unreachable) - } else { - None - } - } -} - -impl Serialize for BuiltIn { - fn put(&self, buf: &mut Vec) { - match self { - Self::Obj => buf.push(0), - Self::Neutral => buf.push(1), - Self::Unreachable => buf.push(2), - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Obj), - 1 => Ok(Self::Neutral), - 2 => Ok(Self::Unreachable), - x => Err(format!("get BuiltIn invalid {x}")), - } - }, - None => Err("get BuiltIn EOF".to_string()), - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum DataValue { - OfString(Address), - OfBool(bool), - OfName(Address), - OfNat(Address), - OfInt(Address), - OfSyntax(Address), -} - -impl Serialize for DataValue { - fn put(&self, buf: &mut Vec) { - match self { - Self::OfString(x) => { - buf.push(0); - x.put(buf); - }, - Self::OfBool(x) => { - buf.push(1); - x.put(buf); - }, - Self::OfName(x) => { - buf.push(2); - x.put(buf); - }, - Self::OfNat(x) => { - buf.push(3); - x.put(buf); - }, - Self::OfInt(x) => { - buf.push(4); - x.put(buf); - }, - Self::OfSyntax(x) => { - buf.push(5); - x.put(buf); - }, - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::OfString(Address::get(buf)?)), - 1 => Ok(Self::OfBool(bool::get(buf)?)), - 2 => Ok(Self::OfName(Address::get(buf)?)), - 3 => Ok(Self::OfNat(Address::get(buf)?)), - 4 => Ok(Self::OfInt(Address::get(buf)?)), - 5 => Ok(Self::OfSyntax(Address::get(buf)?)), - x => Err(format!("get DataValue invalid {x}")), - } - }, - None => Err("get DataValue EOF".to_string()), - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Metadatum { - Link(Address), - Info(BinderInfo), - Hints(ReducibilityHints), - Links(Vec
), - Map(Vec<(Address, Address)>), - KVMap(Vec<(Address, DataValue)>), - Muts(Vec>), -} - -impl Serialize for Metadatum { - fn put(&self, buf: &mut Vec) { - match self { - Self::Link(x) => { - buf.push(0); - x.put(buf); - }, - Self::Info(x) => { - buf.push(1); - x.put(buf); - }, - Self::Hints(x) => { - buf.push(2); - x.put(buf); - }, - Self::Links(x) => { - buf.push(3); - x.put(buf); - }, - Self::Map(x) => { - buf.push(4); - x.put(buf); - }, - Self::KVMap(x) => { - buf.push(5); - x.put(buf); - }, - Self::Muts(x) => { - buf.push(6); - x.put(buf); - }, - } - } - - fn get(buf: &mut &[u8]) -> Result { - match buf.split_at_checked(1) { - Some((head, rest)) => { - *buf = rest; - match head[0] { - 0 => Ok(Self::Link(Address::get(buf)?)), - 1 => Ok(Self::Info(BinderInfo::get(buf)?)), - 2 => Ok(Self::Hints(ReducibilityHints::get(buf)?)), - 3 => Ok(Self::Links(Vec::
::get(buf)?)), - 4 => Ok(Self::Map(Vec::<(Address, Address)>::get(buf)?)), - 5 => Ok(Self::KVMap(Vec::<(Address, DataValue)>::get(buf)?)), - 6 => Ok(Self::Muts(Vec::>::get(buf)?)), - x => Err(format!("get Metadatum invalid {x}")), - } - }, - None => Err("get Metadatum EOF".to_string()), - } - } -} - -#[derive(Debug, Default, Clone, PartialEq, Eq)] -pub struct Metadata { - pub nodes: Vec, -} - -impl Serialize for Metadata { - fn put(&self, buf: &mut Vec) { - Tag4 { flag: 0xF, size: self.nodes.len() as u64 }.put(buf); - for n in self.nodes.iter() { - n.put(buf) - } - } - - fn get(buf: &mut &[u8]) -> Result { - let tag = Tag4::get(buf)?; - match tag { - Tag4 { flag: 0xF, size } => { - let mut nodes = vec![]; - for _ in 0..size { - nodes.push(Metadatum::get(buf)?) - } - Ok(Metadata { nodes }) - }, - x => Err(format!("get Metadata invalid {x:?}")), - } - } -} - -#[rustfmt::skip] -#[derive(Debug, Default, Clone, PartialEq, Eq)] -pub enum Ixon { - #[default] - NAnon, // 0x00, anonymous name - NStr(Address, Address), // 0x01, string name - NNum(Address, Address), // 0x02, number name - UZero, // 0x03, universe zero - USucc(Address), // 0x04, universe successor - UMax(Address, Address), // 0x05, universe max - UIMax(Address, Address), // 0x06, universe impredicative max - UVar(Nat), // 0x1X, universe variable - EVar(Nat), // 0x2X, expression variable - ERef(Address, Vec
), // 0x3X, expression reference - ERec(Nat, Vec
), // 0x4X, expression recursion - EPrj(Address, Nat, Address), // 0x5X, expression projection - ESort(Address), // 0x80, expression sort - EStr(Address), // 0x81, expression string - ENat(Address), // 0x82, expression natural - EApp(Address, Address), // 0x83, expression application - ELam(Address, Address), // 0x84, expression lambda - EAll(Address, Address), // 0x85, expression forall - ELet(bool, Address, Address, Address), // 0x86, 0x87, expression let - Blob(Vec), // 0x9X, tagged bytes - Defn(Definition), // 0xA0, definition constant - Recr(Recursor), // 0xA1, recursor constant - Axio(Axiom), // 0xA2, axiom constant - Quot(Quotient), // 0xA3, quotient constant - CPrj(ConstructorProj), // 0xA4, constructor projection - RPrj(RecursorProj), // 0xA5, recursor projection - IPrj(InductiveProj), // 0xA6, inductive projection - DPrj(DefinitionProj), // 0xA7, definition projection - Muts(Vec), // 0xBX, mutual constants - Prof(Proof), // 0xE0, zero-knowledge proof - Eval(EvalClaim), // 0xE1, evaluation claim - Chck(CheckClaim), // 0xE2, typechecking claim - Comm(Comm), // 0xE3, cryptographic commitment - Envn(Env), // 0xE4, multi-claim environment - Prim(BuiltIn), // 0xE5, compiler built-ins - Meta(Metadata), // 0xFX, metadata -} - -impl Ixon { - pub fn put_tag(flag: u8, size: u64, buf: &mut Vec) { - Tag4 { flag, size }.put(buf); - } - - pub fn puts(xs: &[S], buf: &mut Vec) { - for x in xs { - x.put(buf) - } - } - - pub fn gets( - len: u64, - buf: &mut &[u8], - ) -> Result, String> { - let mut vec = vec![]; - for _ in 0..len { - let s = S::get(buf)?; - vec.push(s); - } - Ok(vec) - } - - pub fn meta(nodes: Vec) -> Self { - Ixon::Meta(Metadata { nodes }) - } -} - -impl Serialize for Ixon { - fn put(&self, buf: &mut Vec) { - match self { - Self::NAnon => Self::put_tag(0x0, 0, buf), - Self::NStr(n, s) => { - Self::put_tag(0x0, 1, buf); - Serialize::put(n, buf); - Serialize::put(s, buf); - }, - Self::NNum(n, s) => { - Self::put_tag(0x0, 2, buf); - Serialize::put(n, buf); - Serialize::put(s, buf); - }, - Self::UZero => Self::put_tag(0x0, 3, buf), - Self::USucc(x) => { - Self::put_tag(0x0, 4, buf); - Serialize::put(x, buf); - }, - Self::UMax(x, y) => { - Self::put_tag(0x0, 5, buf); - Serialize::put(x, buf); - Serialize::put(y, buf); - }, - Self::UIMax(x, y) => { - Self::put_tag(0x0, 6, buf); - Serialize::put(x, buf); - Serialize::put(y, buf); - }, - Self::UVar(x) => { - let bytes = x.0.to_bytes_le(); - Self::put_tag(0x1, bytes.len() as u64, buf); - Self::puts(&bytes, buf) - }, - Self::EVar(x) => { - let bytes = x.0.to_bytes_le(); - Self::put_tag(0x2, bytes.len() as u64, buf); - Self::puts(&bytes, buf) - }, - Self::ERef(a, ls) => { - Self::put_tag(0x3, ls.len() as u64, buf); - a.put(buf); - Self::puts(ls, buf) - }, - Self::ERec(i, ls) => { - Self::put_tag(0x4, ls.len() as u64, buf); - i.put(buf); - Self::puts(ls, buf) - }, - Self::EPrj(t, n, x) => { - let bytes = n.0.to_bytes_le(); - Self::put_tag(0x5, bytes.len() as u64, buf); - t.put(buf); - Self::puts(&bytes, buf); - x.put(buf); - }, - Self::ESort(u) => { - Self::put_tag(0x8, 0, buf); - u.put(buf); - }, - Self::EStr(s) => { - Self::put_tag(0x8, 1, buf); - s.put(buf); - }, - Self::ENat(n) => { - Self::put_tag(0x8, 2, buf); - n.put(buf); - }, - Self::EApp(f, a) => { - Self::put_tag(0x8, 3, buf); - f.put(buf); - a.put(buf); - }, - Self::ELam(t, b) => { - Self::put_tag(0x8, 4, buf); - t.put(buf); - b.put(buf); - }, - Self::EAll(t, b) => { - Self::put_tag(0x8, 5, buf); - t.put(buf); - b.put(buf); - }, - Self::ELet(nd, t, d, b) => { - if *nd { - Self::put_tag(0x8, 6, buf); - } else { - Self::put_tag(0x8, 7, buf); - } - t.put(buf); - d.put(buf); - b.put(buf); - }, - Self::Blob(xs) => { - Self::put_tag(0x9, xs.len() as u64, buf); - Self::puts(xs, buf); - }, - Self::Defn(x) => { - Self::put_tag(0xA, 0, buf); - x.put(buf); - }, - Self::Recr(x) => { - Self::put_tag(0xA, 1, buf); - x.put(buf); - }, - Self::Axio(x) => { - Self::put_tag(0xA, 2, buf); - x.put(buf); - }, - Self::Quot(x) => { - Self::put_tag(0xA, 3, buf); - x.put(buf); - }, - Self::CPrj(x) => { - Self::put_tag(0xA, 4, buf); - x.put(buf); - }, - Self::RPrj(x) => { - Self::put_tag(0xA, 5, buf); - x.put(buf); - }, - Self::IPrj(x) => { - Self::put_tag(0xA, 6, buf); - x.put(buf); - }, - Self::DPrj(x) => { - Self::put_tag(0xA, 7, buf); - x.put(buf); - }, - Self::Muts(xs) => { - Self::put_tag(0xB, xs.len() as u64, buf); - Self::puts(xs, buf); - }, - Self::Prof(x) => { - Self::put_tag(0xE, 0, buf); - x.put(buf); - }, - Self::Eval(x) => { - Self::put_tag(0xE, 1, buf); - x.put(buf); - }, - Self::Chck(x) => { - Self::put_tag(0xE, 2, buf); - x.put(buf); - }, - Self::Comm(x) => { - Self::put_tag(0xE, 3, buf); - x.put(buf); - }, - Self::Envn(x) => { - Self::put_tag(0xE, 4, buf); - x.put(buf); - }, - Self::Prim(x) => { - Self::put_tag(0xE, 5, buf); - x.put(buf); - }, - Self::Meta(x) => x.put(buf), - } - } - fn get(buf: &mut &[u8]) -> Result { - let tag = Tag4::get(buf)?; - match tag { - Tag4 { flag: 0x0, size: 0 } => Ok(Self::NAnon), - Tag4 { flag: 0x0, size: 1 } => { - Ok(Self::NStr(Address::get(buf)?, Address::get(buf)?)) - }, - Tag4 { flag: 0x0, size: 2 } => { - Ok(Self::NNum(Address::get(buf)?, Address::get(buf)?)) - }, - Tag4 { flag: 0x0, size: 3 } => Ok(Self::UZero), - Tag4 { flag: 0x0, size: 4 } => Ok(Self::USucc(Address::get(buf)?)), - Tag4 { flag: 0x0, size: 5 } => { - Ok(Self::UMax(Address::get(buf)?, Address::get(buf)?)) - }, - Tag4 { flag: 0x0, size: 6 } => { - Ok(Self::UIMax(Address::get(buf)?, Address::get(buf)?)) - }, - Tag4 { flag: 0x1, size } => { - let bytes: Vec = Self::gets(size, buf)?; - Ok(Self::UVar(Nat::from_le_bytes(&bytes))) - }, - Tag4 { flag: 0x2, size } => { - let bytes: Vec = Self::gets(size, buf)?; - Ok(Self::EVar(Nat::from_le_bytes(&bytes))) - }, - Tag4 { flag: 0x3, size } => { - Ok(Self::ERef(Address::get(buf)?, Self::gets(size, buf)?)) - }, - Tag4 { flag: 0x4, size } => { - Ok(Self::ERec(Nat::get(buf)?, Self::gets(size, buf)?)) - }, - Tag4 { flag: 0x5, size } => Ok(Self::EPrj( - Address::get(buf)?, - Nat::from_le_bytes(&Self::gets(size, buf)?), - Address::get(buf)?, - )), - Tag4 { flag: 0x8, size: 0 } => Ok(Self::ESort(Address::get(buf)?)), - Tag4 { flag: 0x8, size: 1 } => Ok(Self::EStr(Address::get(buf)?)), - Tag4 { flag: 0x8, size: 2 } => Ok(Self::ENat(Address::get(buf)?)), - Tag4 { flag: 0x8, size: 3 } => { - Ok(Self::EApp(Address::get(buf)?, Address::get(buf)?)) - }, - Tag4 { flag: 0x8, size: 4 } => { - Ok(Self::ELam(Address::get(buf)?, Address::get(buf)?)) - }, - Tag4 { flag: 0x8, size: 5 } => { - Ok(Self::EAll(Address::get(buf)?, Address::get(buf)?)) - }, - Tag4 { flag: 0x8, size: 6 } => Ok(Self::ELet( - true, - Address::get(buf)?, - Address::get(buf)?, - Address::get(buf)?, - )), - Tag4 { flag: 0x8, size: 7 } => Ok(Self::ELet( - false, - Address::get(buf)?, - Address::get(buf)?, - Address::get(buf)?, - )), - Tag4 { flag: 0x9, size } => { - let bytes: Vec = Self::gets(size, buf)?; - Ok(Self::Blob(bytes)) - }, - Tag4 { flag: 0xA, size: 0 } => Ok(Self::Defn(Serialize::get(buf)?)), - Tag4 { flag: 0xA, size: 1 } => Ok(Self::Recr(Serialize::get(buf)?)), - Tag4 { flag: 0xA, size: 2 } => Ok(Self::Axio(Serialize::get(buf)?)), - Tag4 { flag: 0xA, size: 3 } => Ok(Self::Quot(Serialize::get(buf)?)), - Tag4 { flag: 0xA, size: 4 } => Ok(Self::CPrj(Serialize::get(buf)?)), - Tag4 { flag: 0xA, size: 5 } => Ok(Self::RPrj(Serialize::get(buf)?)), - Tag4 { flag: 0xA, size: 6 } => Ok(Self::IPrj(Serialize::get(buf)?)), - Tag4 { flag: 0xA, size: 7 } => Ok(Self::DPrj(Serialize::get(buf)?)), - Tag4 { flag: 0xB, size } => { - let xs: Vec = Self::gets(size, buf)?; - Ok(Self::Muts(xs)) - }, - Tag4 { flag: 0xE, size: 0 } => Ok(Self::Prof(Serialize::get(buf)?)), - Tag4 { flag: 0xE, size: 1 } => Ok(Self::Eval(Serialize::get(buf)?)), - Tag4 { flag: 0xE, size: 2 } => Ok(Self::Chck(Serialize::get(buf)?)), - Tag4 { flag: 0xE, size: 3 } => Ok(Self::Comm(Serialize::get(buf)?)), - Tag4 { flag: 0xE, size: 4 } => Ok(Self::Envn(Serialize::get(buf)?)), - Tag4 { flag: 0xE, size: 5 } => Ok(Self::Prim(Serialize::get(buf)?)), - Tag4 { flag: 0xF, size } => { - let nodes: Vec = Self::gets(size, buf)?; - Ok(Self::Meta(Metadata { nodes })) - }, - x => Err(format!("get Ixon invalid {x:?}")), - } - } -} +//! Ixon: Content-addressed serialization format for Lean kernel types. +//! +//! This module provides: +//! - Alpha-invariant representations of Lean expressions and constants +//! - Compact tag-based serialization (Tag4 for exprs, Tag2 for univs, Tag0 for ints) +//! - Content-addressed storage with sharing support +//! - Cryptographic commitments for ZK proofs + +pub mod comm; +pub mod constant; +pub mod env; +pub mod error; +pub mod expr; +pub mod metadata; +pub mod proof; +pub mod serialize; +pub mod sharing; +pub mod tag; +pub mod univ; + +// Re-export main types +pub use comm::Comm; +pub use constant::{ + Axiom, Constant, ConstantInfo, Constructor, ConstructorProj, DefKind, + Definition, DefinitionProj, Inductive, InductiveProj, MutConst, Quotient, + Recursor, RecursorProj, RecursorRule, +}; +pub use env::{Env, Named}; +pub use error::{CompileError, DecompileError, SerializeError}; +pub use expr::Expr; +pub use metadata::{ + ConstantMeta, DataValue, ExprMeta, ExprMetaData, KVMap, NameIndex, + NameReverseIndex, +}; +pub use proof::{ + CheckClaim, Claim, EvalClaim, Proof, RevealClaim, RevealConstantInfo, + RevealConstructorInfo, RevealMutConstInfo, RevealRecursorRule, +}; +pub use tag::{Tag0, Tag2, Tag4}; +pub use univ::Univ; +/// Shared test utilities for ixon modules. #[cfg(test)] pub mod tests { - use super::*; use quickcheck::{Arbitrary, Gen}; use std::ops::Range; @@ -1672,758 +54,425 @@ pub mod tests { } } - pub fn gen_vec(g: &mut Gen, size: usize, mut f: F) -> Vec - where - F: FnMut(&mut Gen) -> A, - { - let len = gen_range(g, 0..size); - let mut vec = Vec::with_capacity(len); - for _ in 0..len { - vec.push(f(g)); - } - vec - } - #[test] - fn unit_u64_trimmed() { - fn test(input: u64, expected: &Vec) -> bool { - let mut tmp = Vec::new(); - let n = u64_byte_count(input); - u64_put_trimmed_le(input, &mut tmp); - if tmp != *expected { - return false; - } - match u64_get_trimmed_le(n as usize, &mut tmp.as_slice()) { - Ok(out) => input == out, - Err(e) => { - println!("err: {e}"); - false - }, - } - } - assert!(test(0x0, &vec![])); - assert!(test(0x01, &vec![0x01])); - assert!(test(0x0000000000000100, &vec![0x00, 0x01])); - assert!(test(0x0000000000010000, &vec![0x00, 0x00, 0x01])); - assert!(test(0x0000000001000000, &vec![0x00, 0x00, 0x00, 0x01])); - assert!(test(0x0000000100000000, &vec![0x00, 0x00, 0x00, 0x00, 0x01])); - assert!(test( - 0x0000010000000000, - &vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x01] - )); - assert!(test( - 0x0001000000000000, - &vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01] - )); - assert!(test( - 0x0100000000000000, - &vec![0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01] - )); - assert!(test( - 0x0102030405060708, - &vec![0x08, 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01] - )); - assert!(test( - 0x57712D6CE2965701, - &vec![0x01, 0x57, 0x96, 0xE2, 0x6C, 0x2D, 0x71, 0x57] - )); - } - - #[quickcheck] - fn prop_u64_trimmed_le_readback(x: u64) -> bool { - let mut buf = Vec::new(); - let n = u64_byte_count(x); - u64_put_trimmed_le(x, &mut buf); - match u64_get_trimmed_le(n as usize, &mut buf.as_slice()) { - Ok(y) => x == y, - Err(e) => { - println!("err: {e}"); - false - }, - } - } - - #[allow(clippy::needless_pass_by_value)] - fn serialize_readback(x: S) -> bool { - let mut buf = Vec::new(); - Serialize::put(&x, &mut buf); - match S::get(&mut buf.as_slice()) { - Ok(y) => x == y, - Err(e) => { - println!("err: {e}"); - false - }, - } - } - - #[quickcheck] - fn prop_u8_readback(x: u8) -> bool { - serialize_readback(x) - } - #[quickcheck] - fn prop_u16_readback(x: u16) -> bool { - serialize_readback(x) - } - #[quickcheck] - fn prop_u32_readback(x: u32) -> bool { - serialize_readback(x) - } - #[quickcheck] - fn prop_u64_readback(x: u64) -> bool { - serialize_readback(x) - } - #[quickcheck] - fn prop_bool_readback(x: bool) -> bool { - serialize_readback(x) - } - - impl Arbitrary for Tag4 { - fn arbitrary(g: &mut Gen) -> Self { - let flag = u8::arbitrary(g) % 16; - Tag4 { flag, size: u64::arbitrary(g) } - } - } - - #[quickcheck] - fn prop_tag4_readback(x: Tag4) -> bool { - serialize_readback(x) - } - - impl Arbitrary for ByteArray { - fn arbitrary(g: &mut Gen) -> Self { - ByteArray(gen_vec(g, 12, u8::arbitrary)) - } - } - - #[quickcheck] - fn prop_bytearray_readback(x: ByteArray) -> bool { - serialize_readback(x) - } - - #[quickcheck] - fn prop_string_readback(x: String) -> bool { - serialize_readback(x) - } - - impl Arbitrary for Nat { - fn arbitrary(g: &mut Gen) -> Self { - Nat::from_le_bytes(&gen_vec(g, 12, u8::arbitrary)) - } - } - - #[quickcheck] - fn prop_nat_readback(x: Nat) -> bool { - serialize_readback(x) - } - - impl Arbitrary for Int { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 2 { - 0 => Int::OfNat(Nat::arbitrary(g)), - 1 => Int::NegSucc(Nat::arbitrary(g)), - _ => unreachable!(), - } - } - } - - #[quickcheck] - fn prop_int_readback(x: Int) -> bool { - serialize_readback(x) - } - - #[quickcheck] - fn prop_vec_bool_readback(x: Vec) -> bool { - serialize_readback(x) - } - - #[quickcheck] - fn prop_pack_bool_readback(x: Vec) -> bool { - let mut bools = x; - bools.truncate(8); - bools == unpack_bools(bools.len(), pack_bools(bools.clone())) - } - - impl Arbitrary for QuotKind { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 4 { - 0 => Self::Type, - 1 => Self::Ctor, - 2 => Self::Lift, - 3 => Self::Ind, - _ => unreachable!(), - } - } - } - - #[quickcheck] - fn prop_quotkind_readback(x: QuotKind) -> bool { - serialize_readback(x) - } - - impl Arbitrary for DefKind { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 3 { - 0 => Self::Definition, - 1 => Self::Opaque, - 2 => Self::Theorem, - _ => unreachable!(), - } - } - } - - #[quickcheck] - fn prop_defkind_readback(x: DefKind) -> bool { - serialize_readback(x) - } - - impl Arbitrary for BinderInfo { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 4 { - 0 => Self::Default, - 1 => Self::Implicit, - 2 => Self::StrictImplicit, - 3 => Self::InstImplicit, - _ => unreachable!(), - } - } - } - - #[quickcheck] - fn prop_binderinfo_readback(x: BinderInfo) -> bool { - serialize_readback(x) - } - - impl Arbitrary for ReducibilityHints { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 3 { - 0 => Self::Opaque, - 1 => Self::Abbrev, - 2 => Self::Regular(u32::arbitrary(g)), - _ => unreachable!(), - } - } - } - - #[quickcheck] - fn prop_reducibilityhints_readback(x: ReducibilityHints) -> bool { - serialize_readback(x) - } - - impl Arbitrary for DefinitionSafety { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 3 { - 0 => Self::Unsafe, - 1 => Self::Safe, - 2 => Self::Partial, - _ => unreachable!(), - } - } - } - - #[quickcheck] - fn prop_defsafety_readback(x: DefinitionSafety) -> bool { - serialize_readback(x) - } - - #[quickcheck] - fn prop_address_readback(x: Address) -> bool { - serialize_readback(x) - } - #[quickcheck] - fn prop_metaaddress_readback(x: MetaAddress) -> bool { - serialize_readback(x) - } - - impl Arbitrary for Quotient { - fn arbitrary(g: &mut Gen) -> Self { - Self { - lvls: Nat::arbitrary(g), - typ: Address::arbitrary(g), - kind: QuotKind::arbitrary(g), - } - } - } - - #[quickcheck] - fn prop_quotient_readback(x: Quotient) -> bool { - serialize_readback(x) - } - - impl Arbitrary for Axiom { - fn arbitrary(g: &mut Gen) -> Self { - Self { - lvls: Nat::arbitrary(g), - typ: Address::arbitrary(g), - is_unsafe: bool::arbitrary(g), - } - } - } - - #[quickcheck] - fn prop_axiom_readback(x: Axiom) -> bool { - serialize_readback(x) - } - - impl Arbitrary for Definition { - fn arbitrary(g: &mut Gen) -> Self { - Self { - kind: DefKind::arbitrary(g), - safety: DefinitionSafety::arbitrary(g), - lvls: Nat::arbitrary(g), - typ: Address::arbitrary(g), - value: Address::arbitrary(g), - } - } - } - - #[quickcheck] - fn prop_definition_readback(x: Definition) -> bool { - serialize_readback(x) - } - - impl Arbitrary for Constructor { - fn arbitrary(g: &mut Gen) -> Self { - Self { - lvls: Nat::arbitrary(g), - typ: Address::arbitrary(g), - cidx: Nat::arbitrary(g), - params: Nat::arbitrary(g), - fields: Nat::arbitrary(g), - is_unsafe: bool::arbitrary(g), - } - } - } - - #[quickcheck] - fn prop_constructor_readback(x: Constructor) -> bool { - serialize_readback(x) - } - - impl Arbitrary for RecursorRule { - fn arbitrary(g: &mut Gen) -> Self { - Self { fields: Nat::arbitrary(g), rhs: Address::arbitrary(g) } - } - } - - #[quickcheck] - fn prop_recursorrule_readback(x: RecursorRule) -> bool { - serialize_readback(x) - } - - impl Arbitrary for Recursor { - fn arbitrary(g: &mut Gen) -> Self { - let x = gen_range(g, 0..9); - let mut rules = vec![]; - for _ in 0..x { - rules.push(RecursorRule::arbitrary(g)); - } - Self { - lvls: Nat::arbitrary(g), - typ: Address::arbitrary(g), - params: Nat::arbitrary(g), - indices: Nat::arbitrary(g), - motives: Nat::arbitrary(g), - minors: Nat::arbitrary(g), - rules, - k: bool::arbitrary(g), - is_unsafe: bool::arbitrary(g), - } - } - } - - #[quickcheck] - fn prop_recursor_readback(x: Recursor) -> bool { - serialize_readback(x) - } - - impl Arbitrary for Inductive { - fn arbitrary(g: &mut Gen) -> Self { - let x = gen_range(g, 0..9); - let mut ctors = vec![]; - for _ in 0..x { - ctors.push(Constructor::arbitrary(g)); - } - Self { - lvls: Nat::arbitrary(g), - typ: Address::arbitrary(g), - params: Nat::arbitrary(g), - indices: Nat::arbitrary(g), - ctors, - nested: Nat::arbitrary(g), - recr: bool::arbitrary(g), - refl: bool::arbitrary(g), - is_unsafe: bool::arbitrary(g), + pub fn next_case(g: &mut Gen, gens: &[(usize, A)]) -> A { + let sum: usize = gens.iter().map(|x| x.0).sum(); + let mut weight: usize = gen_range(g, 1..(sum + 1)); + for (n, case) in gens { + if *n == 0 { + continue; } - } - } - - #[quickcheck] - fn prop_inductive_readback(x: Inductive) -> bool { - serialize_readback(x) - } - - impl Arbitrary for InductiveProj { - fn arbitrary(g: &mut Gen) -> Self { - Self { block: Address::arbitrary(g), idx: Nat::arbitrary(g) } - } - } - - #[quickcheck] - fn prop_inductiveproj_readback(x: InductiveProj) -> bool { - serialize_readback(x) - } - - impl Arbitrary for ConstructorProj { - fn arbitrary(g: &mut Gen) -> Self { - Self { - block: Address::arbitrary(g), - idx: Nat::arbitrary(g), - cidx: Nat::arbitrary(g), + match weight.checked_sub(*n) { + None | Some(0) => return *case, + _ => weight -= *n, } } + gens.last().unwrap().1 } - #[quickcheck] - fn prop_constructorproj_readback(x: ConstructorProj) -> bool { - serialize_readback(x) + pub fn gen_vec(g: &mut Gen, size: usize, mut f: F) -> Vec + where + F: FnMut(&mut Gen) -> A, + { + let len = gen_range(g, 0..size); + (0..len).map(|_| f(g)).collect() } +} - impl Arbitrary for RecursorProj { - fn arbitrary(g: &mut Gen) -> Self { - Self { block: Address::arbitrary(g), idx: Nat::arbitrary(g) } - } - } +/// Tests verifying the byte-level examples in docs/Ixon.md are correct. +#[cfg(test)] +mod doc_examples { + use super::*; + use crate::ix::address::Address; - #[quickcheck] - fn prop_recursorproj_readback(x: RecursorProj) -> bool { - serialize_readback(x) - } + // ========================================================================= + // Tag4 examples (docs section "Tag4 (4-bit flag)") + // ========================================================================= - impl Arbitrary for DefinitionProj { - fn arbitrary(g: &mut Gen) -> Self { - Self { block: Address::arbitrary(g), idx: Nat::arbitrary(g) } - } + #[test] + fn tag4_small_value() { + // Tag4 { flag: 0x1, size: 5 } + // Header: 0b0001_0_101 = 0x15 + let tag = Tag4::new(0x1, 5); + let mut buf = Vec::new(); + tag.put(&mut buf); + assert_eq!(buf, vec![0x15], "Tag4 {{ flag: 1, size: 5 }} should be 0x15"); } - #[quickcheck] - fn prop_definitionproj_readback(x: DefinitionProj) -> bool { - serialize_readback(x) + #[test] + fn tag4_large_value() { + // Tag4 { flag: 0x2, size: 256 } + // Header: 0b0010_1_001 = 0x29 (large=1, 2 bytes follow) + // Bytes: 0x00 0x01 (256 in little-endian) + let tag = Tag4::new(0x2, 256); + let mut buf = Vec::new(); + tag.put(&mut buf); + assert_eq!( + buf, + vec![0x29, 0x00, 0x01], + "Tag4 {{ flag: 2, size: 256 }} should be [0x29, 0x00, 0x01]" + ); } - impl Arbitrary for Comm { - fn arbitrary(g: &mut Gen) -> Self { - Self { secret: Address::arbitrary(g), payload: Address::arbitrary(g) } - } - } + // ========================================================================= + // Tag2 examples (docs section "Tag2 (2-bit flag)") + // ========================================================================= - #[quickcheck] - fn prop_comm_readback(x: Comm) -> bool { - serialize_readback(x) + #[test] + fn tag2_small_value() { + // Tag2 { flag: 0, size: 15 } + // Header: 0b00_0_01111 = 0x0F + let tag = Tag2::new(0, 15); + let mut buf = Vec::new(); + tag.put(&mut buf); + assert_eq!(buf, vec![0x0F], "Tag2 {{ flag: 0, size: 15 }} should be 0x0F"); } - impl Arbitrary for EvalClaim { - fn arbitrary(g: &mut Gen) -> Self { - Self { - lvls: Address::arbitrary(g), - typ: Address::arbitrary(g), - input: Address::arbitrary(g), - output: Address::arbitrary(g), - } - } + #[test] + fn tag2_large_value() { + // Tag2 { flag: 3, size: 100 } + // 100 doesn't fit in 5 bits, needs 1 byte to encode + // Header: 0b11_1_00000 = 0xE0 (flag=3, large=1, byte_count-1=0) + // Bytes: 0x64 (100) + let tag = Tag2::new(3, 100); + let mut buf = Vec::new(); + tag.put(&mut buf); + assert_eq!( + buf, + vec![0xE0, 0x64], + "Tag2 {{ flag: 3, size: 100 }} should be [0xE0, 0x64]" + ); } - #[quickcheck] - fn prop_evalclaim_readback(x: EvalClaim) -> bool { - serialize_readback(x) - } + // ========================================================================= + // Tag0 examples (docs section "Tag0 (no flag)") + // ========================================================================= - impl Arbitrary for CheckClaim { - fn arbitrary(g: &mut Gen) -> Self { - Self { - lvls: Address::arbitrary(g), - typ: Address::arbitrary(g), - value: Address::arbitrary(g), - } - } + #[test] + fn tag0_small_value() { + // Tag0 { size: 42 } + // Header: 0b0_0101010 = 0x2A + let tag = Tag0::new(42); + let mut buf = Vec::new(); + tag.put(&mut buf); + assert_eq!(buf, vec![0x2A], "Tag0 {{ size: 42 }} should be 0x2A"); } - #[quickcheck] - fn prop_checkclaim_readback(x: CheckClaim) -> bool { - serialize_readback(x) + #[test] + fn tag0_large_value() { + // Tag0 { size: 1000 } + // 1000 = 0x3E8, needs 2 bytes to encode + // Header: 0b1_0000001 = 0x81 (large=1, byte_count-1=1) + // Bytes: 0xE8 0x03 (1000 in little-endian) + let tag = Tag0::new(1000); + let mut buf = Vec::new(); + tag.put(&mut buf); + assert_eq!( + buf, + vec![0x81, 0xE8, 0x03], + "Tag0 {{ size: 1000 }} should be [0x81, 0xE8, 0x03]" + ); } - impl Arbitrary for Claim { - fn arbitrary(g: &mut Gen) -> Self { - let x = gen_range(g, 0..1); - match x { - 0 => Self::Evals(EvalClaim::arbitrary(g)), - _ => Self::Checks(CheckClaim::arbitrary(g)), - } - } - } + // ========================================================================= + // Universe examples (docs section "Universes") + // ========================================================================= - #[quickcheck] - fn prop_claim_readback(x: Claim) -> bool { - serialize_readback(x) + #[test] + fn univ_zero() { + // Univ::Zero -> Tag2 { flag: 0, size: 0 } -> 0x00 + let mut buf = Vec::new(); + univ::put_univ(&Univ::zero(), &mut buf); + assert_eq!(buf, vec![0x00], "Univ::Zero should be 0x00"); } - impl Arbitrary for Proof { - fn arbitrary(g: &mut Gen) -> Self { - let x = gen_range(g, 0..32); - let mut bytes = vec![]; - for _ in 0..x { - bytes.push(u8::arbitrary(g)); - } - Proof { claim: Claim::arbitrary(g), proof: bytes } - } + #[test] + fn univ_succ_zero() { + // Univ::Succ(Zero) uses telescope compression: + // Tag2 { flag: 0, size: 1 } (succ_count=1) + base (Zero) + // = 0b00_0_00001 = 0x01, then Zero = 0x00 + let mut buf = Vec::new(); + univ::put_univ(&Univ::succ(Univ::zero()), &mut buf); + assert_eq!( + buf, + vec![0x01, 0x00], + "Univ::Succ(Zero) should be [0x01, 0x00]" + ); } - #[quickcheck] - fn prop_proof_readback(x: Proof) -> bool { - serialize_readback(x) + #[test] + fn univ_var_1() { + // Univ::Var(1) -> Tag2 { flag: 3, size: 1 } + // = 0b11_0_00001 = 0xC1 + let mut buf = Vec::new(); + univ::put_univ(&Univ::var(1), &mut buf); + assert_eq!(buf, vec![0xC1], "Univ::Var(1) should be 0xC1"); } - impl Arbitrary for Env { - fn arbitrary(g: &mut Gen) -> Self { - let x = gen_range(g, 0..32); - let mut env = vec![]; - for _ in 0..x { - env.push(MetaAddress::arbitrary(g)); - } - Env { env } - } + #[test] + fn univ_max_zero_var1() { + // Univ::Max(Zero, Var(1)) -> Tag2 { flag: 1, size: 0 } + Zero + Var(1) + // = 0b01_0_00000 = 0x40, then 0x00 (Zero), then 0xC1 (Var(1)) + let mut buf = Vec::new(); + univ::put_univ(&Univ::max(Univ::zero(), Univ::var(1)), &mut buf); + assert_eq!( + buf, + vec![0x40, 0x00, 0xC1], + "Univ::Max(Zero, Var(1)) should be [0x40, 0x00, 0xC1]" + ); } - #[quickcheck] - fn prop_env_readback(x: Env) -> bool { - serialize_readback(x) - } + // ========================================================================= + // Expression examples (docs section "Expression Examples") + // ========================================================================= - impl Arbitrary for Substring { - fn arbitrary(g: &mut Gen) -> Self { - Substring { - str: Address::arbitrary(g), - start_pos: Nat::arbitrary(g), - stop_pos: Nat::arbitrary(g), - } - } + #[test] + fn expr_var_0() { + // Expr::Var(0) -> Tag4 { flag: 0x1, size: 0 } -> 0x10 + let mut buf = Vec::new(); + serialize::put_expr(&Expr::Var(0), &mut buf); + assert_eq!(buf, vec![0x10], "Expr::Var(0) should be 0x10"); } - #[quickcheck] - fn prop_substring_readback(x: Substring) -> bool { - serialize_readback(x) + #[test] + fn expr_sort_0() { + // Expr::Sort(0) -> Tag4 { flag: 0x0, size: 0 } -> 0x00 + let mut buf = Vec::new(); + serialize::put_expr(&Expr::Sort(0), &mut buf); + assert_eq!(buf, vec![0x00], "Expr::Sort(0) should be 0x00"); } - impl Arbitrary for SourceInfo { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 3 { - 0 => Self::Original( - Substring::arbitrary(g), - Nat::arbitrary(g), - Substring::arbitrary(g), - Nat::arbitrary(g), - ), - 1 => Self::Synthetic( - Nat::arbitrary(g), - Nat::arbitrary(g), - bool::arbitrary(g), - ), - 2 => Self::None, - _ => unreachable!(), - } - } + #[test] + fn expr_ref_no_univs() { + // Expr::Ref(0, []) -> Tag4 { flag: 0x2, size: 0 } + idx(0) + // = 0x20 + 0x00 + let mut buf = Vec::new(); + serialize::put_expr(&Expr::Ref(0, vec![]), &mut buf); + assert_eq!( + buf, + vec![0x20, 0x00], + "Expr::Ref(0, []) should be [0x20, 0x00]" + ); } - #[quickcheck] - fn prop_sourceinfo_readback(x: SourceInfo) -> bool { - serialize_readback(x) + #[test] + fn expr_share_5() { + // Expr::Share(5) -> Tag4 { flag: 0xB, size: 5 } -> 0xB5 + let mut buf = Vec::new(); + serialize::put_expr(&Expr::Share(5), &mut buf); + assert_eq!(buf, vec![0xB5], "Expr::Share(5) should be 0xB5"); } - impl Arbitrary for Preresolved { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 2 { - 0 => Self::Namespace(Address::arbitrary(g)), - 1 => { - Self::Decl(Address::arbitrary(g), gen_vec(g, 12, Address::arbitrary)) - }, - _ => unreachable!(), - } - } + #[test] + fn expr_app_telescope() { + // App(App(App(f, a), b), c) with f=Var(3), a=Var(2), b=Var(1), c=Var(0) + // -> Tag4 { flag: 0x7, size: 3 } + f + a + b + c + // = 0x73 + 0x13 + 0x12 + 0x11 + 0x10 + let expr = Expr::app( + Expr::app(Expr::app(Expr::var(3), Expr::var(2)), Expr::var(1)), + Expr::var(0), + ); + let mut buf = Vec::new(); + serialize::put_expr(&expr, &mut buf); + assert_eq!( + buf, + vec![0x73, 0x13, 0x12, 0x11, 0x10], + "App telescope should be [0x73, 0x13, 0x12, 0x11, 0x10]" + ); } - #[quickcheck] - fn prop_preresolved_readback(x: Preresolved) -> bool { - serialize_readback(x) + #[test] + fn expr_lam_telescope() { + // Lam(t1, Lam(t2, Lam(t3, body))) with all types Sort(0) and body Var(0) + // -> Tag4 { flag: 0x8, size: 3 } + t1 + t2 + t3 + body + // = 0x83 + 0x00 + 0x00 + 0x00 + 0x10 + let ty = Expr::sort(0); + let expr = Expr::lam( + ty.clone(), + Expr::lam(ty.clone(), Expr::lam(ty.clone(), Expr::var(0))), + ); + let mut buf = Vec::new(); + serialize::put_expr(&expr, &mut buf); + assert_eq!( + buf, + vec![0x83, 0x00, 0x00, 0x00, 0x10], + "Lam telescope should be [0x83, 0x00, 0x00, 0x00, 0x10]" + ); } - impl Arbitrary for Syntax { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 4 { - 0 => Self::Missing, - 1 => Self::Node( - SourceInfo::arbitrary(g), - Address::arbitrary(g), - gen_vec(g, 12, Address::arbitrary), - ), - 2 => Self::Atom(SourceInfo::arbitrary(g), Address::arbitrary(g)), - 3 => Self::Ident( - SourceInfo::arbitrary(g), - Substring::arbitrary(g), - Address::arbitrary(g), - gen_vec(g, 12, Preresolved::arbitrary), - ), - _ => unreachable!(), - } - } - } + // ========================================================================= + // Claim/Proof examples (docs section "Proofs") + // ========================================================================= - #[quickcheck] - fn prop_syntax_readback(x: Syntax) -> bool { - serialize_readback(x) + #[test] + fn eval_claim_tag() { + // EvalClaim -> Tag4 { flag: 0xE, size: 4 } -> 0xE4 + let claim = Claim::Evals(EvalClaim { + input: Address::hash(b"input"), + output: Address::hash(b"output"), + }); + let mut buf = Vec::new(); + claim.put(&mut buf); + assert_eq!(buf[0], 0xE4, "EvalClaim should start with 0xE4"); + assert_eq!(buf.len(), 1 + 64, "EvalClaim should be 1 + 2*32 = 65 bytes"); } - impl Arbitrary for MutConst { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 3 { - 0 => Self::Defn(Definition::arbitrary(g)), - 1 => Self::Indc(Inductive::arbitrary(g)), - 2 => Self::Recr(Recursor::arbitrary(g)), - _ => unreachable!(), - } - } + #[test] + fn eval_proof_tag() { + // EvalProof -> Tag4 { flag: 0xE, size: 2 } -> 0xE2 + let proof = Proof::new( + Claim::Evals(EvalClaim { + input: Address::hash(b"input"), + output: Address::hash(b"output"), + }), + vec![1, 2, 3, 4], + ); + let mut buf = Vec::new(); + proof.put(&mut buf); + assert_eq!(buf[0], 0xE2, "EvalProof should start with 0xE2"); + // 1 (tag) + 64 (addresses) + 1 (len=4) + 4 (proof bytes) = 70 + assert_eq!(buf.len(), 70, "EvalProof with 4 bytes should be 70 bytes"); + assert_eq!(buf[65], 0x04, "proof.len should be 0x04"); + assert_eq!(&buf[66..70], &[1, 2, 3, 4], "proof bytes should be [1,2,3,4]"); } - #[quickcheck] - fn prop_mutconst_readback(x: MutConst) -> bool { - serialize_readback(x) + #[test] + fn check_claim_tag() { + // CheckClaim -> Tag4 { flag: 0xE, size: 3 } -> 0xE3 + let claim = Claim::Checks(CheckClaim { value: Address::hash(b"value") }); + let mut buf = Vec::new(); + claim.put(&mut buf); + assert_eq!(buf[0], 0xE3, "CheckClaim should start with 0xE3"); + assert_eq!(buf.len(), 1 + 32, "CheckClaim should be 1 + 1*32 = 33 bytes"); } - impl Arbitrary for BuiltIn { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 3 { - 0 => Self::Obj, - 1 => Self::Neutral, - 2 => Self::Unreachable, - _ => unreachable!(), - } - } + #[test] + fn check_proof_tag() { + // CheckProof -> Tag4 { flag: 0xE, size: 1 } -> 0xE1 + let proof = Proof::new( + Claim::Checks(CheckClaim { value: Address::hash(b"value") }), + vec![5, 6, 7], + ); + let mut buf = Vec::new(); + proof.put(&mut buf); + assert_eq!(buf[0], 0xE1, "CheckProof should start with 0xE1"); } - #[quickcheck] - fn prop_builtin_readback(x: BuiltIn) -> bool { - serialize_readback(x) - } + // ========================================================================= + // Definition packed byte example (docs "Comprehensive Worked Example") + // ========================================================================= - impl Arbitrary for DataValue { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 6 { - 0 => Self::OfString(Address::arbitrary(g)), - 1 => Self::OfBool(bool::arbitrary(g)), - 2 => Self::OfName(Address::arbitrary(g)), - 3 => Self::OfNat(Address::arbitrary(g)), - 4 => Self::OfInt(Address::arbitrary(g)), - 5 => Self::OfSyntax(Address::arbitrary(g)), - _ => unreachable!(), - } - } + #[test] + fn definition_packed_kind_safety() { + // DefKind::Definition = 0, DefinitionSafety::Safe = 1 + // Packed: (0 << 2) | 1 = 0x01 + use crate::ix::env::DefinitionSafety; + use constant::{DefKind, Definition}; + + let def = Definition { + kind: DefKind::Definition, + safety: DefinitionSafety::Safe, + lvls: 0, + typ: Expr::sort(0), + value: Expr::var(0), + }; + let mut buf = Vec::new(); + def.put(&mut buf); + assert_eq!(buf[0], 0x01, "Definition(Safe) packed byte should be 0x01"); } - #[quickcheck] - fn prop_datavalue_readback(x: DataValue) -> bool { - serialize_readback(x) + #[test] + fn definition_opaque_unsafe() { + // DefKind::Opaque = 1, DefinitionSafety::Unsafe = 0 + // Packed: (1 << 2) | 0 = 0x04 + use crate::ix::env::DefinitionSafety; + use constant::{DefKind, Definition}; + + let def = Definition { + kind: DefKind::Opaque, + safety: DefinitionSafety::Unsafe, + lvls: 0, + typ: Expr::sort(0), + value: Expr::var(0), + }; + let mut buf = Vec::new(); + def.put(&mut buf); + assert_eq!(buf[0], 0x04, "Opaque(Unsafe) packed byte should be 0x04"); } - impl Arbitrary for Metadatum { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 7 { - 0 => Self::Link(Address::arbitrary(g)), - 1 => Self::Info(BinderInfo::arbitrary(g)), - 2 => Self::Hints(ReducibilityHints::arbitrary(g)), - 3 => Self::Links(gen_vec(g, 12, Address::arbitrary)), - 4 => Self::Map(gen_vec(g, 12, |g| { - (Address::arbitrary(g), Address::arbitrary(g)) - })), - 5 => Self::KVMap(gen_vec(g, 12, |g| { - (Address::arbitrary(g), DataValue::arbitrary(g)) - })), - 6 => Self::Muts(gen_vec(g, 12, |g| gen_vec(g, 12, Address::arbitrary))), - _ => unreachable!(), - } - } + #[test] + fn definition_theorem_partial() { + // DefKind::Theorem = 2, DefinitionSafety::Partial = 2 + // Packed: (2 << 2) | 2 = 0x0A + use crate::ix::env::DefinitionSafety; + use constant::{DefKind, Definition}; + + let def = Definition { + kind: DefKind::Theorem, + safety: DefinitionSafety::Partial, + lvls: 0, + typ: Expr::sort(0), + value: Expr::var(0), + }; + let mut buf = Vec::new(); + def.put(&mut buf); + assert_eq!(buf[0], 0x0A, "Theorem(Partial) packed byte should be 0x0A"); } - #[quickcheck] - fn prop_metadatum_readback(x: Metadatum) -> bool { - serialize_readback(x) - } + // ========================================================================= + // Constant tag examples + // ========================================================================= - impl Arbitrary for Metadata { - fn arbitrary(g: &mut Gen) -> Self { - Metadata { nodes: gen_vec(g, 12, Metadatum::arbitrary) } - } + #[test] + fn constant_defn_tag() { + // Constant with Defn -> Tag4 { flag: 0xD, size: 0 } -> 0xD0 + use crate::ix::env::DefinitionSafety; + use constant::{Constant, ConstantInfo, DefKind, Definition}; + + let constant = Constant::new(ConstantInfo::Defn(Definition { + kind: DefKind::Definition, + safety: DefinitionSafety::Safe, + lvls: 0, + typ: Expr::sort(0), + value: Expr::var(0), + })); + let mut buf = Vec::new(); + constant.put(&mut buf); + assert_eq!(buf[0], 0xD0, "Constant(Defn) should start with 0xD0"); } - #[quickcheck] - fn prop_metadata_readback(x: Metadata) -> bool { - serialize_readback(x) + #[test] + fn constant_muts_tag() { + // Muts with 3 entries -> Tag4 { flag: 0xC, size: 3 } -> 0xC3 + use crate::ix::env::DefinitionSafety; + use constant::{Constant, ConstantInfo, DefKind, Definition, MutConst}; + + let def = Definition { + kind: DefKind::Definition, + safety: DefinitionSafety::Safe, + lvls: 0, + typ: Expr::sort(0), + value: Expr::var(0), + }; + let constant = Constant::new(ConstantInfo::Muts(vec![ + MutConst::Defn(def.clone()), + MutConst::Defn(def.clone()), + MutConst::Defn(def), + ])); + let mut buf = Vec::new(); + constant.put(&mut buf); + assert_eq!(buf[0], 0xC3, "Muts with 3 entries should start with 0xC3"); } - impl Arbitrary for Ixon { - fn arbitrary(g: &mut Gen) -> Self { - match u8::arbitrary(g) % 36 { - 0 => Self::NAnon, - 1 => Self::NStr(Address::arbitrary(g), Address::arbitrary(g)), - 2 => Self::NNum(Address::arbitrary(g), Address::arbitrary(g)), - 3 => Self::UZero, - 4 => Self::USucc(Address::arbitrary(g)), - 5 => Self::UMax(Address::arbitrary(g), Address::arbitrary(g)), - 6 => Self::UIMax(Address::arbitrary(g), Address::arbitrary(g)), - 7 => Self::UVar(Nat::arbitrary(g)), - 8 => Self::EVar(Nat::arbitrary(g)), - 9 => { - Self::ERef(Address::arbitrary(g), gen_vec(g, 12, Address::arbitrary)) - }, - 10 => Self::ERec(Nat::arbitrary(g), gen_vec(g, 12, Address::arbitrary)), - 11 => Self::EPrj( - Address::arbitrary(g), - Nat::arbitrary(g), - Address::arbitrary(g), - ), - 12 => Self::ESort(Address::arbitrary(g)), - 13 => Self::EStr(Address::arbitrary(g)), - 14 => Self::ENat(Address::arbitrary(g)), - 15 => Self::EApp(Address::arbitrary(g), Address::arbitrary(g)), - 16 => Self::ELam(Address::arbitrary(g), Address::arbitrary(g)), - 17 => Self::EAll(Address::arbitrary(g), Address::arbitrary(g)), - 18 => Self::ELet( - bool::arbitrary(g), - Address::arbitrary(g), - Address::arbitrary(g), - Address::arbitrary(g), - ), - 19 => Self::Blob(gen_vec(g, 12, u8::arbitrary)), - 20 => Self::Defn(Definition::arbitrary(g)), - 21 => Self::Recr(Recursor::arbitrary(g)), - 22 => Self::Axio(Axiom::arbitrary(g)), - 23 => Self::Quot(Quotient::arbitrary(g)), - 24 => Self::CPrj(ConstructorProj::arbitrary(g)), - 25 => Self::RPrj(RecursorProj::arbitrary(g)), - 26 => Self::IPrj(InductiveProj::arbitrary(g)), - 27 => Self::DPrj(DefinitionProj::arbitrary(g)), - 28 => Self::Muts(gen_vec(g, 12, MutConst::arbitrary)), - 29 => Self::Prof(Proof::arbitrary(g)), - 30 => Self::Eval(EvalClaim::arbitrary(g)), - 31 => Self::Chck(CheckClaim::arbitrary(g)), - 32 => Self::Comm(Comm::arbitrary(g)), - 33 => Self::Envn(Env::arbitrary(g)), - 34 => Self::Prim(BuiltIn::arbitrary(g)), - 35 => Self::Meta(Metadata::arbitrary(g)), - _ => unreachable!(), - } - } - } + // ========================================================================= + // Environment tag + // ========================================================================= - #[quickcheck] - fn prop_ixon_readback(x: Ixon) -> bool { - serialize_readback(x) + #[test] + fn env_tag() { + // Env -> Tag4 { flag: 0xE, size: 0 } -> 0xE0 + let env = Env::new(); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + assert_eq!(buf[0], 0xE0, "Env should start with 0xE0 (flag=E, variant=0)"); } } diff --git a/src/ix/ixon/comm.rs b/src/ix/ixon/comm.rs new file mode 100644 index 00000000..408a9c74 --- /dev/null +++ b/src/ix/ixon/comm.rs @@ -0,0 +1,147 @@ +//! Cryptographic commitments. + +#![allow(clippy::map_err_ignore)] +#![allow(clippy::needless_pass_by_value)] + +use crate::ix::address::Address; + +use super::tag::Tag4; + +/// Tag4 variant for Commitment (flag=0xE, size=5). +pub const VARIANT: u64 = 5; + +/// A cryptographic commitment. +/// +/// The commitment is computed as `blake3(Tag4{0xE,5} || secret || payload)` where: +/// - `secret` is the address of a random blinding factor (stored in blobs) +/// - `payload` is the address of the committed constant +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct Comm { + /// Address of the blinding secret (in blobs map) + pub secret: Address, + /// Address of the committed constant + pub payload: Address, +} + +impl Comm { + pub fn new(secret: Address, payload: Address) -> Self { + Comm { secret, payload } + } + + /// Serialize without tag header (for use within Env section serialization). + pub fn put(&self, buf: &mut Vec) { + buf.extend_from_slice(self.secret.as_bytes()); + buf.extend_from_slice(self.payload.as_bytes()); + } + + /// Deserialize without tag header (for use within Env section serialization). + pub fn get(buf: &mut &[u8]) -> Result { + if buf.len() < 64 { + return Err(format!("Comm::get: need 64 bytes, have {}", buf.len())); + } + let (secret_bytes, rest) = buf.split_at(32); + let (payload_bytes, rest) = rest.split_at(32); + *buf = rest; + + let secret = Address::from_slice(secret_bytes) + .map_err(|_| "Comm::get: invalid secret")?; + let payload = Address::from_slice(payload_bytes) + .map_err(|_| "Comm::get: invalid payload")?; + + Ok(Comm { secret, payload }) + } + + /// Serialize with Tag4{0xE, 5} header. + pub fn put_tagged(&self, buf: &mut Vec) { + Tag4::new(0xE, VARIANT).put(buf); + self.put(buf); + } + + /// Deserialize with Tag4{0xE, 5} header. + pub fn get_tagged(buf: &mut &[u8]) -> Result { + let tag = Tag4::get(buf)?; + if tag.flag != 0xE || tag.size != VARIANT { + return Err(format!( + "Comm::get_tagged: expected Tag4{{0xE, 5}}, got Tag4{{{}, {}}}", + tag.flag, tag.size + )); + } + Self::get(buf) + } + + /// Serialize with tag and compute content address: `blake3(0xE5 + secret + payload)`. + pub fn commit(&self) -> Address { + let mut buf = Vec::new(); + self.put_tagged(&mut buf); + Address::hash(&buf) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use quickcheck::Arbitrary; + + impl Arbitrary for Comm { + fn arbitrary(g: &mut quickcheck::Gen) -> Self { + Comm::new(Address::arbitrary(g), Address::arbitrary(g)) + } + } + + fn comm_roundtrip(c: &Comm) -> bool { + let mut buf = Vec::new(); + c.put(&mut buf); + match Comm::get(&mut buf.as_slice()) { + Ok(c2) => c == &c2, + Err(_) => false, + } + } + + fn comm_tagged_roundtrip(c: &Comm) -> bool { + let mut buf = Vec::new(); + c.put_tagged(&mut buf); + match Comm::get_tagged(&mut buf.as_slice()) { + Ok(c2) => c == &c2, + Err(_) => false, + } + } + + #[quickcheck] + fn prop_comm_roundtrip(c: Comm) -> bool { + comm_roundtrip(&c) + } + + #[quickcheck] + fn prop_comm_tagged_roundtrip(c: Comm) -> bool { + comm_tagged_roundtrip(&c) + } + + #[test] + fn test_comm_roundtrip() { + let comm = Comm::new(Address::hash(b"secret"), Address::hash(b"payload")); + assert!(comm_roundtrip(&comm)); + } + + #[test] + fn test_comm_tagged_roundtrip() { + let comm = Comm::new(Address::hash(b"secret"), Address::hash(b"payload")); + assert!(comm_tagged_roundtrip(&comm)); + } + + #[test] + fn test_comm_tagged_tag_byte() { + let comm = Comm::new(Address::hash(b"a"), Address::hash(b"b")); + let mut buf = Vec::new(); + comm.put_tagged(&mut buf); + assert_eq!(buf[0], 0xE5, "Comm tagged should start with 0xE5"); + } + + #[test] + fn test_comm_commit() { + let comm = Comm::new(Address::hash(b"secret"), Address::hash(b"payload")); + let addr = comm.commit(); + // Commit should be deterministic + let addr2 = comm.commit(); + assert_eq!(addr, addr2); + } +} diff --git a/src/ix/ixon/constant.rs b/src/ix/ixon/constant.rs new file mode 100644 index 00000000..a7979dde --- /dev/null +++ b/src/ix/ixon/constant.rs @@ -0,0 +1,456 @@ +//! Constants in the Ixon format. +//! +//! These are alpha-invariant representations of Lean constants. +//! Metadata (names, binder info) is stored separately in the names map. +//! +//! The sharing vector is stored at the Constant level, shared across +//! all expressions in the constant (including mutual block members). + +#![allow(clippy::needless_pass_by_value)] + +use std::sync::Arc; + +use crate::ix::address::Address; +use crate::ix::env::{DefinitionSafety, QuotKind}; + +use super::expr::Expr; +use super::univ::Univ; + +/// Definition kind (definition, opaque, or theorem). +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum DefKind { + Definition, + Opaque, + Theorem, +} + +/// A definition constant. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Definition { + pub kind: DefKind, + pub safety: DefinitionSafety, + /// Number of universe parameters + pub lvls: u64, + /// Type expression + pub typ: Arc, + /// Value expression + pub value: Arc, +} + +/// A recursor rule (computation rule). +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct RecursorRule { + /// Number of fields in this constructor + pub fields: u64, + /// Right-hand side expression + pub rhs: Arc, +} + +/// A recursor constant. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Recursor { + /// K-like recursor (eliminates into Prop) + pub k: bool, + pub is_unsafe: bool, + /// Number of universe parameters + pub lvls: u64, + /// Number of parameters + pub params: u64, + /// Number of indices + pub indices: u64, + /// Number of motives + pub motives: u64, + /// Number of minor premises + pub minors: u64, + /// Type expression + pub typ: Arc, + /// Computation rules + pub rules: Vec, +} + +/// An axiom constant. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Axiom { + pub is_unsafe: bool, + /// Number of universe parameters + pub lvls: u64, + /// Type expression + pub typ: Arc, +} + +/// A quotient constant. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Quotient { + pub kind: QuotKind, + /// Number of universe parameters + pub lvls: u64, + /// Type expression + pub typ: Arc, +} + +/// A constructor within an inductive type. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Constructor { + pub is_unsafe: bool, + /// Number of universe parameters + pub lvls: u64, + /// Constructor index + pub cidx: u64, + /// Number of parameters + pub params: u64, + /// Number of fields + pub fields: u64, + /// Type expression + pub typ: Arc, +} + +/// An inductive type. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Inductive { + /// Has recursive occurrences + pub recr: bool, + /// Is reflexive + pub refl: bool, + pub is_unsafe: bool, + /// Number of universe parameters + pub lvls: u64, + /// Number of parameters + pub params: u64, + /// Number of indices + pub indices: u64, + /// Nested inductive depth + pub nested: u64, + /// Type expression + pub typ: Arc, + /// Constructors + pub ctors: Vec, +} + +/// Projection into a mutual block for an inductive type. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct InductiveProj { + /// Index within the mutual block + pub idx: u64, + /// Address of the mutual block + pub block: Address, +} + +/// Projection into a mutual block for a constructor. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ConstructorProj { + /// Inductive index within the mutual block + pub idx: u64, + /// Constructor index within the inductive + pub cidx: u64, + /// Address of the mutual block + pub block: Address, +} + +/// Projection into a mutual block for a recursor. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct RecursorProj { + /// Index within the mutual block + pub idx: u64, + /// Address of the mutual block + pub block: Address, +} + +/// Projection into a mutual block for a definition. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct DefinitionProj { + /// Index within the mutual block + pub idx: u64, + /// Address of the mutual block + pub block: Address, +} + +/// A constant within a mutual block. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum MutConst { + Defn(Definition), + Indc(Inductive), + Recr(Recursor), +} + +/// The variant/payload of a constant (alpha-invariant, no metadata). +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum ConstantInfo { + Defn(Definition), + Recr(Recursor), + Axio(Axiom), + Quot(Quotient), + CPrj(ConstructorProj), + RPrj(RecursorProj), + IPrj(InductiveProj), + DPrj(DefinitionProj), + Muts(Vec), +} + +impl ConstantInfo { + // Constant variant indices (used as Tag4 size field) + // These are 0-7, fitting in 3 bits for single-byte Tag4 + // Note: Muts uses a separate flag (0xC), not a variant here + pub const CONST_DEFN: u64 = 0; + pub const CONST_RECR: u64 = 1; + pub const CONST_AXIO: u64 = 2; + pub const CONST_QUOT: u64 = 3; + pub const CONST_CPRJ: u64 = 4; + pub const CONST_RPRJ: u64 = 5; + pub const CONST_IPRJ: u64 = 6; + pub const CONST_DPRJ: u64 = 7; + + /// Returns the variant index (used as Tag4 size field) + /// Returns None for Muts (which uses its own flag) + pub fn variant(&self) -> Option { + match self { + Self::Defn(_) => Some(Self::CONST_DEFN), + Self::Recr(_) => Some(Self::CONST_RECR), + Self::Axio(_) => Some(Self::CONST_AXIO), + Self::Quot(_) => Some(Self::CONST_QUOT), + Self::CPrj(_) => Some(Self::CONST_CPRJ), + Self::RPrj(_) => Some(Self::CONST_RPRJ), + Self::IPrj(_) => Some(Self::CONST_IPRJ), + Self::DPrj(_) => Some(Self::CONST_DPRJ), + Self::Muts(_) => None, // Uses FLAG_MUTS, not a variant + } + } +} + +/// A top-level constant with its sharing, refs, and univs vectors. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Constant { + /// The constant payload + pub info: ConstantInfo, + /// Shared subexpressions referenced by Expr::Share(idx) + pub sharing: Vec>, + /// Reference table: addresses referenced by Expr::Ref(idx, _), Expr::Prj, Expr::Str, Expr::Nat + pub refs: Vec
, + /// Universe table: universes referenced by Expr::Sort(idx), Expr::Ref(_, univs), Expr::Rec(_, univs) + pub univs: Vec>, +} + +impl Constant { + /// Tag4 flag used for non-Muts constants (variant in size field, always 1 byte) + pub const FLAG: u8 = 0xD; + /// Tag4 flag used for Muts constants (entry count in size field) + pub const FLAG_MUTS: u8 = 0xC; + + /// Create a new constant with no sharing, refs, or univs + pub fn new(info: ConstantInfo) -> Self { + Constant { info, sharing: Vec::new(), refs: Vec::new(), univs: Vec::new() } + } + + /// Create a new constant with sharing, refs, and univs + pub fn with_tables( + info: ConstantInfo, + sharing: Vec>, + refs: Vec
, + univs: Vec>, + ) -> Self { + Constant { info, sharing, refs, univs } + } +} + +#[cfg(test)] +pub mod tests { + use super::*; + use crate::ix::env::{DefinitionSafety, QuotKind}; + use crate::ix::ixon::expr::tests::arbitrary_expr; + use crate::ix::ixon::tests::gen_range; + use quickcheck::{Arbitrary, Gen}; + + impl Arbitrary for DefKind { + fn arbitrary(g: &mut Gen) -> Self { + match u8::arbitrary(g) % 3 { + 0 => DefKind::Definition, + 1 => DefKind::Opaque, + _ => DefKind::Theorem, + } + } + } + + impl Arbitrary for DefinitionSafety { + fn arbitrary(g: &mut Gen) -> Self { + match u8::arbitrary(g) % 3 { + 0 => DefinitionSafety::Unsafe, + 1 => DefinitionSafety::Safe, + _ => DefinitionSafety::Partial, + } + } + } + + impl Arbitrary for QuotKind { + fn arbitrary(g: &mut Gen) -> Self { + match u8::arbitrary(g) % 4 { + 0 => QuotKind::Type, + 1 => QuotKind::Ctor, + 2 => QuotKind::Lift, + _ => QuotKind::Ind, + } + } + } + + pub fn gen_sharing(g: &mut Gen) -> Vec> { + (0..gen_range(g, 0..4)).map(|_| arbitrary_expr(g)).collect() + } + + pub fn gen_refs(g: &mut Gen) -> Vec
{ + (0..gen_range(g, 0..4)).map(|_| Address::arbitrary(g)).collect() + } + + pub fn gen_univs(g: &mut Gen) -> Vec> { + use crate::ix::ixon::univ::tests::arbitrary_univ; + (0..gen_range(g, 0..4)).map(|_| arbitrary_univ(g)).collect() + } + + pub fn gen_definition(g: &mut Gen) -> Definition { + Definition { + kind: DefKind::arbitrary(g), + safety: DefinitionSafety::arbitrary(g), + lvls: u64::arbitrary(g) % 10, + typ: arbitrary_expr(g), + value: arbitrary_expr(g), + } + } + + fn gen_recursor_rule(g: &mut Gen) -> RecursorRule { + RecursorRule { fields: u64::arbitrary(g) % 10, rhs: arbitrary_expr(g) } + } + + pub fn gen_recursor(g: &mut Gen) -> Recursor { + Recursor { + k: bool::arbitrary(g), + is_unsafe: bool::arbitrary(g), + lvls: u64::arbitrary(g) % 10, + params: u64::arbitrary(g) % 10, + indices: u64::arbitrary(g) % 5, + motives: u64::arbitrary(g) % 3, + minors: u64::arbitrary(g) % 10, + typ: arbitrary_expr(g), + rules: (0..gen_range(g, 0..5)).map(|_| gen_recursor_rule(g)).collect(), + } + } + + pub fn gen_axiom(g: &mut Gen) -> Axiom { + Axiom { + is_unsafe: bool::arbitrary(g), + lvls: u64::arbitrary(g) % 10, + typ: arbitrary_expr(g), + } + } + + pub fn gen_quotient(g: &mut Gen) -> Quotient { + Quotient { + kind: QuotKind::arbitrary(g), + lvls: u64::arbitrary(g) % 10, + typ: arbitrary_expr(g), + } + } + + fn gen_constructor(g: &mut Gen) -> Constructor { + Constructor { + is_unsafe: bool::arbitrary(g), + lvls: u64::arbitrary(g) % 10, + cidx: u64::arbitrary(g) % 10, + params: u64::arbitrary(g) % 10, + fields: u64::arbitrary(g) % 10, + typ: arbitrary_expr(g), + } + } + + pub fn gen_inductive(g: &mut Gen) -> Inductive { + Inductive { + recr: bool::arbitrary(g), + refl: bool::arbitrary(g), + is_unsafe: bool::arbitrary(g), + lvls: u64::arbitrary(g) % 10, + params: u64::arbitrary(g) % 10, + indices: u64::arbitrary(g) % 5, + nested: u64::arbitrary(g) % 3, + typ: arbitrary_expr(g), + ctors: (0..gen_range(g, 0..4)).map(|_| gen_constructor(g)).collect(), + } + } + + fn gen_mut_const(g: &mut Gen) -> MutConst { + match u8::arbitrary(g) % 3 { + 0 => MutConst::Defn(gen_definition(g)), + 1 => MutConst::Indc(gen_inductive(g)), + _ => MutConst::Recr(gen_recursor(g)), + } + } + + fn gen_constant_info(g: &mut Gen) -> ConstantInfo { + match u8::arbitrary(g) % 9 { + 0 => ConstantInfo::Defn(gen_definition(g)), + 1 => ConstantInfo::Recr(gen_recursor(g)), + 2 => ConstantInfo::Axio(gen_axiom(g)), + 3 => ConstantInfo::Quot(gen_quotient(g)), + 4 => ConstantInfo::CPrj(ConstructorProj { + idx: u64::arbitrary(g) % 10, + cidx: u64::arbitrary(g) % 10, + block: Address::arbitrary(g), + }), + 5 => ConstantInfo::RPrj(RecursorProj { + idx: u64::arbitrary(g) % 10, + block: Address::arbitrary(g), + }), + 6 => ConstantInfo::IPrj(InductiveProj { + idx: u64::arbitrary(g) % 10, + block: Address::arbitrary(g), + }), + 7 => ConstantInfo::DPrj(DefinitionProj { + idx: u64::arbitrary(g) % 10, + block: Address::arbitrary(g), + }), + _ => ConstantInfo::Muts( + (0..gen_range(g, 1..4)).map(|_| gen_mut_const(g)).collect(), + ), + } + } + + pub fn gen_constant(g: &mut Gen) -> Constant { + Constant { + info: gen_constant_info(g), + sharing: gen_sharing(g), + refs: gen_refs(g), + univs: gen_univs(g), + } + } + + #[derive(Clone, Debug)] + struct ArbitraryConstant(Constant); + + impl Arbitrary for ArbitraryConstant { + fn arbitrary(g: &mut Gen) -> Self { + ArbitraryConstant(gen_constant(g)) + } + } + + fn constant_roundtrip(c: &Constant) -> bool { + let mut buf = Vec::new(); + c.put(&mut buf); + match Constant::get(&mut buf.as_slice()) { + Ok(c2) => c == &c2, + Err(err) => { + eprintln!("constant_roundtrip error: {err}"); + false + }, + } + } + + #[quickcheck] + fn prop_constant_roundtrip(c: ArbitraryConstant) -> bool { + constant_roundtrip(&c.0) + } + + #[test] + fn constant_tag4_serialization() { + let defn = gen_definition(&mut Gen::new(10)); + let cnst = Constant::new(ConstantInfo::Defn(defn)); + let mut buf = Vec::new(); + cnst.put(&mut buf); + assert_eq!(buf[0] >> 4, Constant::FLAG, "Constant should use flag 0xD"); + assert!(constant_roundtrip(&cnst)); + } +} diff --git a/src/ix/ixon/env.rs b/src/ix/ixon/env.rs new file mode 100644 index 00000000..b13ce571 --- /dev/null +++ b/src/ix/ixon/env.rs @@ -0,0 +1,342 @@ +//! Environment for storing Ixon data. + +use dashmap::DashMap; + +use crate::ix::address::Address; +use crate::ix::env::Name; + +use super::comm::Comm; +use super::constant::Constant; +use super::metadata::ConstantMeta; + +/// A named constant with metadata. +#[derive(Clone, Debug)] +pub struct Named { + /// Address of the constant (in consts map) + pub addr: Address, + /// Typed metadata for this constant (includes mutual context in `all` field) + pub meta: ConstantMeta, +} + +impl Named { + pub fn new(addr: Address, meta: ConstantMeta) -> Self { + Named { addr, meta } + } + + pub fn with_addr(addr: Address) -> Self { + Named { addr, meta: ConstantMeta::default() } + } +} + +/// The Ixon environment. +/// +/// Contains five maps: +/// - `consts`: Alpha-invariant constants indexed by content hash +/// - `named`: Named references with metadata and mutual context +/// - `blobs`: Raw data (strings, nats, files) +/// - `names`: Hash-consed Lean.Name components (Address -> Name) +/// - `comms`: Cryptographic commitments (secrets) +/// - `addr_to_name`: Reverse index from constant address to name (for O(1) lookup) +#[derive(Debug, Default)] +pub struct Env { + /// Alpha-invariant constants: Address -> Constant + pub consts: DashMap, + /// Named references: Name -> (constant address, metadata, ctx) + pub named: DashMap, + /// Raw data blobs: Address -> bytes + pub blobs: DashMap>, + /// Hash-consed Lean.Name components: Address -> Name + pub names: DashMap, + /// Cryptographic commitments: commitment Address -> Comm + pub comms: DashMap, + /// Reverse index: constant Address -> Name (for fast lookup during decompile) + pub addr_to_name: DashMap, +} + +impl Env { + pub fn new() -> Self { + Env { + consts: DashMap::new(), + named: DashMap::new(), + blobs: DashMap::new(), + names: DashMap::new(), + comms: DashMap::new(), + addr_to_name: DashMap::new(), + } + } + + /// Store a blob and return its content address. + pub fn store_blob(&self, bytes: Vec) -> Address { + let addr = Address::hash(&bytes); + self.blobs.insert(addr.clone(), bytes); + addr + } + + /// Get a blob by address. + pub fn get_blob(&self, addr: &Address) -> Option> { + self.blobs.get(addr).map(|r| r.clone()) + } + + /// Store a constant and return its content address. + /// Note: The actual hashing/serialization is done elsewhere. + pub fn store_const(&self, addr: Address, constant: Constant) { + self.consts.insert(addr, constant); + } + + /// Get a constant by address. + pub fn get_const(&self, addr: &Address) -> Option { + self.consts.get(addr).map(|r| r.clone()) + } + + /// Register a named constant. + pub fn register_name(&self, name: Name, named: Named) { + // Also insert into reverse index for O(1) lookup by address + self.addr_to_name.insert(named.addr.clone(), name.clone()); + self.named.insert(name, named); + } + + /// Look up a name. + pub fn lookup_name(&self, name: &Name) -> Option { + self.named.get(name).map(|r| r.clone()) + } + + /// Look up name by constant address (O(1) using reverse index). + pub fn get_name_by_addr(&self, addr: &Address) -> Option { + self.addr_to_name.get(addr).map(|r| r.clone()) + } + + /// Look up named entry by constant address (O(1) using reverse index). + pub fn get_named_by_addr(&self, addr: &Address) -> Option { + self.get_name_by_addr(addr).and_then(|name| self.lookup_name(&name)) + } + + /// Store a hash-consed name component. + pub fn store_name(&self, addr: Address, name: Name) { + self.names.insert(addr, name); + } + + /// Get a name by address. + pub fn get_name(&self, addr: &Address) -> Option { + self.names.get(addr).map(|r| r.clone()) + } + + /// Store a commitment. + pub fn store_comm(&self, addr: Address, comm: Comm) { + self.comms.insert(addr, comm); + } + + /// Get a commitment by address. + pub fn get_comm(&self, addr: &Address) -> Option { + self.comms.get(addr).map(|r| r.clone()) + } + + /// Number of constants. + pub fn const_count(&self) -> usize { + self.consts.len() + } + + /// Number of named entries. + pub fn named_count(&self) -> usize { + self.named.len() + } + + /// Number of hash-consed name components. + pub fn name_count(&self) -> usize { + self.names.len() + } + + /// Number of blobs. + pub fn blob_count(&self) -> usize { + self.blobs.len() + } + + /// Number of commitments. + pub fn comm_count(&self) -> usize { + self.comms.len() + } +} + +impl Clone for Env { + fn clone(&self) -> Self { + let consts = DashMap::new(); + for entry in self.consts.iter() { + consts.insert(entry.key().clone(), entry.value().clone()); + } + + let named = DashMap::new(); + for entry in self.named.iter() { + named.insert(entry.key().clone(), entry.value().clone()); + } + + let blobs = DashMap::new(); + for entry in self.blobs.iter() { + blobs.insert(entry.key().clone(), entry.value().clone()); + } + + let names = DashMap::new(); + for entry in self.names.iter() { + names.insert(entry.key().clone(), entry.value().clone()); + } + + let comms = DashMap::new(); + for entry in self.comms.iter() { + comms.insert(entry.key().clone(), entry.value().clone()); + } + + let addr_to_name = DashMap::new(); + for entry in self.addr_to_name.iter() { + addr_to_name.insert(entry.key().clone(), entry.value().clone()); + } + + Env { consts, named, blobs, names, comms, addr_to_name } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ix::env::Name; + use crate::ix::ixon::constant::{Axiom, Constant, ConstantInfo}; + use crate::ix::ixon::expr::Expr; + use std::sync::Arc; + + fn n(s: &str) -> Name { + Name::str(Name::anon(), s.to_string()) + } + + fn dummy_constant() -> Constant { + Constant::new(ConstantInfo::Axio(Axiom { + is_unsafe: false, + lvls: 0, + typ: Arc::new(Expr::Sort(0)), + })) + } + + #[test] + fn store_and_get_blob() { + let env = Env::new(); + let data = vec![1, 2, 3, 4, 5]; + let addr = env.store_blob(data.clone()); + assert_eq!(env.get_blob(&addr), Some(data)); + // Same content produces same address + let addr2 = env.store_blob(vec![1, 2, 3, 4, 5]); + assert_eq!(addr, addr2); + } + + #[test] + fn store_and_get_const() { + let env = Env::new(); + let constant = dummy_constant(); + let addr = Address::hash(b"test-constant"); + env.store_const(addr.clone(), constant.clone()); + let got = env.get_const(&addr).unwrap(); + assert_eq!(got, constant); + } + + #[test] + fn register_and_lookup_name() { + let env = Env::new(); + let name = n("MyConst"); + let addr = Address::hash(b"my-const-addr"); + let named = Named::with_addr(addr.clone()); + env.register_name(name.clone(), named.clone()); + let got = env.lookup_name(&name).unwrap(); + assert_eq!(got.addr, addr); + } + + #[test] + fn get_name_by_addr_reverse_index() { + let env = Env::new(); + let name = n("Reverse"); + let addr = Address::hash(b"reverse-addr"); + let named = Named::with_addr(addr.clone()); + env.register_name(name.clone(), named); + let got_name = env.get_name_by_addr(&addr).unwrap(); + assert_eq!(got_name, name); + } + + #[test] + fn get_named_by_addr_resolves_through_reverse_index() { + let env = Env::new(); + let name = n("Through"); + let addr = Address::hash(b"through-addr"); + let named = Named::with_addr(addr.clone()); + env.register_name(name.clone(), named); + let got = env.get_named_by_addr(&addr).unwrap(); + assert_eq!(got.addr, addr); + } + + #[test] + fn store_and_get_name_component() { + let env = Env::new(); + let name = n("Component"); + let addr = Address::hash(b"name-component"); + env.store_name(addr.clone(), name.clone()); + assert_eq!(env.get_name(&addr), Some(name)); + } + + #[test] + fn store_and_get_comm() { + let env = Env::new(); + let secret = Address::hash(b"secret"); + let payload = Address::hash(b"payload"); + let comm = Comm::new(secret.clone(), payload.clone()); + let comm_addr = Address::hash(b"comm-addr"); + env.store_comm(comm_addr.clone(), comm.clone()); + let got = env.get_comm(&comm_addr).unwrap(); + assert_eq!(got, comm); + } + + #[test] + fn counts() { + let env = Env::new(); + assert_eq!(env.const_count(), 0); + assert_eq!(env.named_count(), 0); + assert_eq!(env.blob_count(), 0); + assert_eq!(env.name_count(), 0); + assert_eq!(env.comm_count(), 0); + + env.store_blob(vec![1]); + assert_eq!(env.blob_count(), 1); + + env.store_const(Address::hash(b"c1"), dummy_constant()); + assert_eq!(env.const_count(), 1); + + env.register_name(n("x"), Named::with_addr(Address::hash(b"x"))); + assert_eq!(env.named_count(), 1); + + env.store_name(Address::hash(b"n1"), n("n")); + assert_eq!(env.name_count(), 1); + + env.store_comm( + Address::hash(b"cm"), + Comm::new(Address::hash(b"s"), Address::hash(b"p")), + ); + assert_eq!(env.comm_count(), 1); + } + + #[test] + fn missing_keys_return_none() { + let env = Env::new(); + let missing = Address::hash(b"nonexistent"); + assert!(env.get_blob(&missing).is_none()); + assert!(env.get_const(&missing).is_none()); + assert!(env.lookup_name(&n("missing")).is_none()); + assert!(env.get_name_by_addr(&missing).is_none()); + assert!(env.get_named_by_addr(&missing).is_none()); + assert!(env.get_name(&missing).is_none()); + assert!(env.get_comm(&missing).is_none()); + } + + #[test] + fn blob_content_addressing() { + let env = Env::new(); + let addr1 = env.store_blob(vec![1, 2, 3]); + let addr2 = env.store_blob(vec![4, 5, 6]); + // Different content produces different addresses + assert_ne!(addr1, addr2); + // Same content produces same address + let addr3 = env.store_blob(vec![1, 2, 3]); + assert_eq!(addr1, addr3); + } +} diff --git a/src/ix/ixon/error.rs b/src/ix/ixon/error.rs new file mode 100644 index 00000000..1ee93b43 --- /dev/null +++ b/src/ix/ixon/error.rs @@ -0,0 +1,194 @@ +//! Custom error types for Ixon serialization and compilation. + +use crate::ix::address::Address; + +/// Errors during serialization/deserialization. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SerializeError { + /// Unexpected end of buffer + UnexpectedEof { expected: String }, + /// Invalid tag byte + InvalidTag { tag: u8, context: String }, + /// Invalid flag in tag + InvalidFlag { flag: u8, context: String }, + /// Invalid variant discriminant + InvalidVariant { variant: u64, context: String }, + /// Invalid boolean value + InvalidBool { value: u8 }, + /// Address parsing error + AddressError, + /// Invalid Share index + InvalidShareIndex { idx: u64, max: usize }, +} + +impl std::fmt::Display for SerializeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::UnexpectedEof { expected } => { + write!(f, "unexpected EOF, expected {expected}") + }, + Self::InvalidTag { tag, context } => { + write!(f, "invalid tag 0x{tag:02X} in {context}") + }, + Self::InvalidFlag { flag, context } => { + write!(f, "invalid flag {flag} in {context}") + }, + Self::InvalidVariant { variant, context } => { + write!(f, "invalid variant {variant} in {context}") + }, + Self::InvalidBool { value } => write!(f, "invalid bool value {value}"), + Self::AddressError => write!(f, "address parsing error"), + Self::InvalidShareIndex { idx, max } => { + write!(f, "invalid Share index {idx}, max is {max}") + }, + } + } +} + +impl std::error::Error for SerializeError {} + +/// Errors during compilation (Lean → Ixon). +/// +/// Variant order matches Lean constructor tags (0–5). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CompileError { + /// Referenced constant not found (tag 0) + MissingConstant { name: String }, + /// Address not found in store (tag 1) + MissingAddress(Address), + /// Invalid mutual block structure (tag 2) + InvalidMutualBlock { reason: String }, + /// Unsupported expression variant (tag 3) + UnsupportedExpr { desc: String }, + /// Unknown universe parameter (tag 4) + UnknownUnivParam { curr: String, param: String }, + /// Serialization error during compilation (tag 5) + Serialize(SerializeError), +} + +impl std::fmt::Display for CompileError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::MissingConstant { name } => write!(f, "missing constant: {name}"), + Self::MissingAddress(addr) => write!(f, "missing address: {addr:?}"), + Self::InvalidMutualBlock { reason } => { + write!(f, "invalid mutual block: {reason}") + }, + Self::UnsupportedExpr { desc } => { + write!(f, "unsupported expression: {desc}") + }, + Self::UnknownUnivParam { curr, param } => { + write!(f, "unknown universe parameter: compiling {curr}, param {param}") + }, + Self::Serialize(e) => write!(f, "serialization error: {e}"), + } + } +} + +impl std::error::Error for CompileError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Serialize(e) => Some(e), + _ => None, + } + } +} + +impl From for CompileError { + fn from(e: SerializeError) -> Self { + Self::Serialize(e) + } +} + +/// Errors during decompilation (Ixon → Lean). +/// +/// Variant order matches Lean constructor tags (0–10). +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DecompileError { + /// Invalid Ref(idx) reference - refs table too small (tag 0) + InvalidRefIndex { idx: u64, refs_len: usize, constant: String }, + /// Invalid universe index - univs table too small (tag 1) + InvalidUnivIndex { idx: u64, univs_len: usize, constant: String }, + /// Invalid Share(idx) reference - sharing vector too small (tag 2) + InvalidShareIndex { idx: u64, max: usize, constant: String }, + /// Invalid Rec(idx) reference - mutual context doesn't have this index (tag 3) + InvalidRecIndex { idx: u64, ctx_size: usize, constant: String }, + /// Invalid Univ::Var(idx) reference - level names too small (tag 4) + InvalidUnivVarIndex { idx: u64, max: usize, constant: String }, + /// Address not found in store (tag 5) + MissingAddress(Address), + /// Metadata not found for address (tag 6) + MissingMetadata(Address), + /// Blob not found at address (tag 7) + BlobNotFound(Address), + /// Bad blob format at address (tag 8) + BadBlobFormat { addr: Address, expected: String }, + /// Bad constant format (tag 9) + BadConstantFormat { msg: String }, + /// Serialization error during decompilation (tag 10) + Serialize(SerializeError), +} + +impl std::fmt::Display for DecompileError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::InvalidRefIndex { idx, refs_len, constant } => { + write!( + f, + "invalid Ref({idx}) in '{constant}': refs table has {refs_len} entries" + ) + }, + Self::InvalidUnivIndex { idx, univs_len, constant } => { + write!( + f, + "invalid univ index {idx} in '{constant}': univs table has {univs_len} entries" + ) + }, + Self::InvalidShareIndex { idx, max, constant } => { + write!( + f, + "invalid Share({idx}) in '{constant}': sharing vector has {max} entries" + ) + }, + Self::InvalidRecIndex { idx, ctx_size, constant } => { + write!( + f, + "invalid Rec({idx}) in '{constant}': mutual context has {ctx_size} entries" + ) + }, + Self::InvalidUnivVarIndex { idx, max, constant } => { + write!( + f, + "invalid Univ::Var({idx}) in '{constant}': only {max} level params" + ) + }, + Self::MissingAddress(addr) => write!(f, "missing address: {addr:?}"), + Self::MissingMetadata(addr) => { + write!(f, "missing metadata for: {addr:?}") + }, + Self::BlobNotFound(addr) => write!(f, "blob not found at: {addr:?}"), + Self::BadBlobFormat { addr, expected } => { + write!(f, "bad blob format at {addr:?}, expected {expected}") + }, + Self::BadConstantFormat { msg } => { + write!(f, "bad constant format: {msg}") + }, + Self::Serialize(e) => write!(f, "serialization error: {e}"), + } + } +} + +impl std::error::Error for DecompileError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Serialize(e) => Some(e), + _ => None, + } + } +} + +impl From for DecompileError { + fn from(e: SerializeError) -> Self { + Self::Serialize(e) + } +} diff --git a/src/ix/ixon/expr.rs b/src/ix/ixon/expr.rs new file mode 100644 index 00000000..5a6f6267 --- /dev/null +++ b/src/ix/ixon/expr.rs @@ -0,0 +1,436 @@ +//! Expressions in the Ixon format. + +#![allow(clippy::cast_possible_truncation)] +#![allow(clippy::needless_pass_by_value)] + +use std::sync::Arc; + +/// Expression in the Ixon format. +/// +/// This is the alpha-invariant representation of Lean expressions. +/// Names are stripped, binder info is stored in metadata. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Expr { + /// Sort/Type at a universe level (index into Constant.univs table) + Sort(u64), + /// De Bruijn variable + Var(u64), + /// Reference to a top-level constant with universe arguments. + /// First u64 is index into Constant.refs, Vec are indices into Constant.univs. + Ref(u64, Vec), + /// Mutual recursion reference (index within block) with universe arguments. + /// First u64 is rec index, Vec are indices into Constant.univs. + Rec(u64, Vec), + /// Projection: (struct_type_ref_idx, field_index, struct_value) + /// The first u64 is an index into Constant.refs table for the struct type. + Prj(u64, u64, Arc), + /// String literal - index into Constant.refs table (address points to blob) + Str(u64), + /// Natural number literal - index into Constant.refs table (address points to blob) + Nat(u64), + /// Application: (function, argument) + App(Arc, Arc), + /// Lambda: (binder_type, body) + Lam(Arc, Arc), + /// Forall/Pi: (binder_type, body) + All(Arc, Arc), + /// Let: (non_dep, type, value, body) + Let(bool, Arc, Arc, Arc), + /// Reference to shared subexpression in MutualBlock.sharing[idx] + Share(u64), +} + +impl Expr { + // Tag4 flags for expression variants (0x0-0xB) + pub const FLAG_SORT: u8 = 0x0; + pub const FLAG_VAR: u8 = 0x1; + pub const FLAG_REF: u8 = 0x2; + pub const FLAG_REC: u8 = 0x3; + pub const FLAG_PRJ: u8 = 0x4; + pub const FLAG_STR: u8 = 0x5; + pub const FLAG_NAT: u8 = 0x6; + pub const FLAG_APP: u8 = 0x7; + pub const FLAG_LAM: u8 = 0x8; + pub const FLAG_ALL: u8 = 0x9; + pub const FLAG_LET: u8 = 0xA; // size=0 for dep, size=1 for non_dep + pub const FLAG_SHARE: u8 = 0xB; + + pub fn sort(univ_idx: u64) -> Arc { + Arc::new(Expr::Sort(univ_idx)) + } + + pub fn var(idx: u64) -> Arc { + Arc::new(Expr::Var(idx)) + } + + pub fn reference(ref_idx: u64, univ_indices: Vec) -> Arc { + Arc::new(Expr::Ref(ref_idx, univ_indices)) + } + + pub fn rec(rec_idx: u64, univ_indices: Vec) -> Arc { + Arc::new(Expr::Rec(rec_idx, univ_indices)) + } + + pub fn prj(type_ref_idx: u64, field_idx: u64, val: Arc) -> Arc { + Arc::new(Expr::Prj(type_ref_idx, field_idx, val)) + } + + pub fn str(ref_idx: u64) -> Arc { + Arc::new(Expr::Str(ref_idx)) + } + + pub fn nat(ref_idx: u64) -> Arc { + Arc::new(Expr::Nat(ref_idx)) + } + + pub fn app(f: Arc, a: Arc) -> Arc { + Arc::new(Expr::App(f, a)) + } + + pub fn lam(ty: Arc, body: Arc) -> Arc { + Arc::new(Expr::Lam(ty, body)) + } + + pub fn all(ty: Arc, body: Arc) -> Arc { + Arc::new(Expr::All(ty, body)) + } + + pub fn let_( + non_dep: bool, + ty: Arc, + val: Arc, + body: Arc, + ) -> Arc { + Arc::new(Expr::Let(non_dep, ty, val, body)) + } + + pub fn share(idx: u64) -> Arc { + Arc::new(Expr::Share(idx)) + } + + /// Count nested applications for telescope compression. + pub fn app_telescope_count(&self) -> u64 { + let mut count = 0u64; + let mut curr = self; + while let Expr::App(f, _) = curr { + count += 1; + curr = f.as_ref(); + } + count + } + + /// Count nested lambdas for telescope compression. + pub fn lam_telescope_count(&self) -> u64 { + let mut count = 0u64; + let mut curr = self; + while let Expr::Lam(_, body) = curr { + count += 1; + curr = body.as_ref(); + } + count + } + + /// Count nested foralls for telescope compression. + pub fn all_telescope_count(&self) -> u64 { + let mut count = 0u64; + let mut curr = self; + while let Expr::All(_, body) = curr { + count += 1; + curr = body.as_ref(); + } + count + } +} + +#[cfg(test)] +pub mod tests { + use super::*; + use crate::ix::ixon::constant::Constant; + use crate::ix::ixon::serialize::{get_expr, put_expr}; + use crate::ix::ixon::tests::gen_range; + use quickcheck::{Arbitrary, Gen}; + use std::ptr; + + #[derive(Clone, Copy)] + enum Case { + Var, + Share, + Str, + Nat, + Sort, + Ref, + Rec, + App, + Lam, + All, + Prj, + Let, + } + + /// Generate an arbitrary Expr using pointer-tree technique (no stack overflow) + pub fn arbitrary_expr(g: &mut Gen) -> Arc { + use crate::ix::ixon::tests::next_case; + + let mut root = Expr::Var(0); + let mut stack = vec![&mut root as *mut Expr]; + + while let Some(ptr) = stack.pop() { + let gens = [ + (100, Case::Var), + (80, Case::Share), + (60, Case::Str), + (60, Case::Nat), + (40, Case::Sort), + (40, Case::Ref), + (40, Case::Rec), + (30, Case::App), + (30, Case::Lam), + (30, Case::All), + (20, Case::Prj), + (10, Case::Let), + ]; + + match next_case(g, &gens) { + Case::Var => unsafe { + ptr::write(ptr, Expr::Var(gen_range(g, 0..16) as u64)); + }, + Case::Share => unsafe { + ptr::write(ptr, Expr::Share(gen_range(g, 0..16) as u64)); + }, + Case::Str => unsafe { + ptr::write(ptr, Expr::Str(gen_range(g, 0..16) as u64)); + }, + Case::Nat => unsafe { + ptr::write(ptr, Expr::Nat(gen_range(g, 0..16) as u64)); + }, + Case::Sort => unsafe { + ptr::write(ptr, Expr::Sort(gen_range(g, 0..16) as u64)); + }, + Case::Ref => { + let univ_indices: Vec<_> = (0..gen_range(g, 0..4)) + .map(|_| gen_range(g, 0..16) as u64) + .collect(); + unsafe { + ptr::write( + ptr, + Expr::Ref(gen_range(g, 0..16) as u64, univ_indices), + ); + } + }, + Case::Rec => { + let univ_indices: Vec<_> = (0..gen_range(g, 0..4)) + .map(|_| gen_range(g, 0..16) as u64) + .collect(); + unsafe { + ptr::write(ptr, Expr::Rec(gen_range(g, 0..8) as u64, univ_indices)); + } + }, + Case::App => { + let mut f = Arc::new(Expr::Var(0)); + let mut a = Arc::new(Expr::Var(0)); + let (f_ptr, a_ptr) = ( + Arc::get_mut(&mut f).unwrap() as *mut Expr, + Arc::get_mut(&mut a).unwrap() as *mut Expr, + ); + unsafe { + ptr::write(ptr, Expr::App(f, a)); + } + stack.push(a_ptr); + stack.push(f_ptr); + }, + Case::Lam => { + let mut ty = Arc::new(Expr::Var(0)); + let mut body = Arc::new(Expr::Var(0)); + let (ty_ptr, body_ptr) = ( + Arc::get_mut(&mut ty).unwrap() as *mut Expr, + Arc::get_mut(&mut body).unwrap() as *mut Expr, + ); + unsafe { + ptr::write(ptr, Expr::Lam(ty, body)); + } + stack.push(body_ptr); + stack.push(ty_ptr); + }, + Case::All => { + let mut ty = Arc::new(Expr::Var(0)); + let mut body = Arc::new(Expr::Var(0)); + let (ty_ptr, body_ptr) = ( + Arc::get_mut(&mut ty).unwrap() as *mut Expr, + Arc::get_mut(&mut body).unwrap() as *mut Expr, + ); + unsafe { + ptr::write(ptr, Expr::All(ty, body)); + } + stack.push(body_ptr); + stack.push(ty_ptr); + }, + Case::Prj => { + let mut val = Arc::new(Expr::Var(0)); + let val_ptr = Arc::get_mut(&mut val).unwrap() as *mut Expr; + let type_ref_idx = gen_range(g, 0..16) as u64; + let field_idx = gen_range(g, 0..8) as u64; + unsafe { + ptr::write(ptr, Expr::Prj(type_ref_idx, field_idx, val)); + } + stack.push(val_ptr); + }, + Case::Let => { + let mut ty = Arc::new(Expr::Var(0)); + let mut val = Arc::new(Expr::Var(0)); + let mut body = Arc::new(Expr::Var(0)); + let (ty_ptr, val_ptr, body_ptr) = ( + Arc::get_mut(&mut ty).unwrap() as *mut Expr, + Arc::get_mut(&mut val).unwrap() as *mut Expr, + Arc::get_mut(&mut body).unwrap() as *mut Expr, + ); + unsafe { + ptr::write(ptr, Expr::Let(bool::arbitrary(g), ty, val, body)); + } + stack.push(body_ptr); + stack.push(val_ptr); + stack.push(ty_ptr); + }, + } + } + Arc::new(root) + } + + #[derive(Clone, Debug)] + struct ArbitraryExpr(Arc); + + impl Arbitrary for ArbitraryExpr { + fn arbitrary(g: &mut Gen) -> Self { + ArbitraryExpr(arbitrary_expr(g)) + } + } + + fn expr_roundtrip(e: &Expr) -> bool { + let mut buf = Vec::new(); + put_expr(e, &mut buf); + match get_expr(&mut buf.as_slice()) { + Ok(e2) => e == e2.as_ref(), + Err(err) => { + eprintln!("expr_roundtrip error: {err}"); + false + }, + } + } + + #[quickcheck] + fn prop_expr_roundtrip(e: ArbitraryExpr) -> bool { + expr_roundtrip(&e.0) + } + + #[test] + fn test_nested_app_telescope() { + let e = Expr::app( + Expr::app(Expr::app(Expr::var(0), Expr::var(1)), Expr::var(2)), + Expr::var(3), + ); + assert!(expr_roundtrip(&e)); + } + + #[test] + fn test_nested_lam_telescope() { + let ty = Expr::sort(0); + let e = + Expr::lam(ty.clone(), Expr::lam(ty.clone(), Expr::lam(ty, Expr::var(0)))); + assert!(expr_roundtrip(&e)); + } + + #[test] + fn test_nested_all_telescope() { + let ty = Expr::sort(0); + let e = Expr::all( + ty.clone(), + Expr::all(ty.clone(), Expr::all(ty, Expr::sort(0))), + ); + assert!(expr_roundtrip(&e)); + } + + #[test] + fn ser_de_expr_var() { + for idx in [0u64, 1, 7, 8, 100, 1000] { + assert!(expr_roundtrip(&Expr::Var(idx))); + } + } + + #[test] + fn ser_de_expr_sort() { + for idx in [0u64, 1, 7, 8, 100, 1000] { + assert!(expr_roundtrip(&Expr::Sort(idx))); + } + } + + #[test] + fn ser_de_expr_str_nat() { + for idx in [0u64, 1, 7, 8, 100, 1000] { + assert!(expr_roundtrip(&Expr::Str(idx))); + assert!(expr_roundtrip(&Expr::Nat(idx))); + } + } + + #[test] + fn ser_de_expr_share() { + for idx in [0u64, 1, 7, 8, 100] { + assert!(expr_roundtrip(&Expr::Share(idx))); + } + } + + #[test] + fn ser_de_expr_lam_telescope_size() { + let ty = Expr::var(1); + let expr = + Expr::lam(ty.clone(), Expr::lam(ty.clone(), Expr::lam(ty, Expr::var(0)))); + let mut buf = Vec::new(); + put_expr(expr.as_ref(), &mut buf); + assert_eq!(buf.len(), 5); + assert!(expr_roundtrip(&expr)); + } + + #[test] + fn ser_de_expr_app_telescope_size() { + let expr = Expr::app( + Expr::app(Expr::app(Expr::var(3), Expr::var(2)), Expr::var(1)), + Expr::var(0), + ); + let mut buf = Vec::new(); + put_expr(expr.as_ref(), &mut buf); + assert_eq!(buf.len(), 5); + assert!(expr_roundtrip(&expr)); + } + + #[test] + fn telescope_lam_byte_boundaries() { + for (n, tag_bytes) in + [(1u64, 1), (7, 1), (8, 2), (255, 2), (256, 3), (500, 3)] + { + let ty = Expr::var(1); + let mut expr: Arc = Expr::var(0); + for _ in 0..n { + expr = Expr::lam(ty.clone(), expr); + } + let mut buf = Vec::new(); + put_expr(expr.as_ref(), &mut buf); + assert_eq!(buf.len(), tag_bytes + (n as usize) + 1); + assert!(expr_roundtrip(&expr)); + } + } + + #[test] + fn expr_and_constant_flags_unique() { + assert_eq!(Expr::FLAG_SORT, 0x0); + assert_eq!(Expr::FLAG_SHARE, 0xB); + assert_eq!(Constant::FLAG_MUTS, 0xC); + assert_eq!(Constant::FLAG, 0xD); + // Expression flags are 0x0-0xB, Constant flags are 0xC-0xD + assert!( + ![0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB] + .contains(&Constant::FLAG) + ); + assert!( + ![0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB] + .contains(&Constant::FLAG_MUTS) + ); + } +} diff --git a/src/ix/ixon/metadata.rs b/src/ix/ixon/metadata.rs new file mode 100644 index 00000000..280c09fb --- /dev/null +++ b/src/ix/ixon/metadata.rs @@ -0,0 +1,846 @@ +//! Arena-based metadata for preserving Lean source information. +//! +//! Metadata types use Address internally, but serialize with u64 indices +//! into a global name index for space efficiency. +//! +//! The arena stores metadata as a tree of ExprMetaData nodes, allocated +//! bottom-up (children before parents). Each ConstantMeta variant stores +//! an ExprMeta arena plus root indices for each expression position. + +#![allow(clippy::cast_possible_truncation)] + +use std::collections::HashMap; + +use crate::ix::address::Address; +use crate::ix::env::{BinderInfo, ReducibilityHints}; + +use super::tag::Tag0; + +// =========================================================================== +// Types (use Address internally) +// =========================================================================== + +/// Key-value map for Lean.Expr.mdata +pub type KVMap = Vec<(Address, DataValue)>; + +/// Arena node for per-expression metadata. +/// +/// Nodes are allocated bottom-up (children before parents) in the arena. +/// Arena indices are u64 values pointing into `ExprMeta.nodes`. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum ExprMetaData { + /// Leaf node: Var, Sort, Nat, Str (no metadata) + Leaf, + /// Application: children = [fun, arg] + App { children: [u64; 2] }, + /// Lambda/ForAll binder: children = [type, body] + Binder { name: Address, info: BinderInfo, children: [u64; 2] }, + /// Let binder: children = [type, value, body] + LetBinder { name: Address, children: [u64; 3] }, + /// Const reference (Ref or Rec): leaf in the arena + Ref { name: Address }, + /// Projection: child = struct value + Prj { struct_name: Address, child: u64 }, + /// Mdata wrapper: always a separate node, never absorbed into Binder/Ref/Prj + Mdata { mdata: Vec, child: u64 }, +} + +/// Arena for expression metadata within a single constant. +/// +/// Nodes are appended bottom-up. Arena indices are stable because the arena +/// is append-only and never reset during a constant's compilation. +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub struct ExprMeta { + pub nodes: Vec, +} + +impl ExprMeta { + /// Allocate a new node in the arena, returning its index. + pub fn alloc(&mut self, node: ExprMetaData) -> u64 { + let idx = self.nodes.len() as u64; + self.nodes.push(node); + idx + } +} + +/// Per-constant metadata with arena-based expression metadata. +/// +/// Each variant stores an ExprMeta arena covering all expressions in +/// that constant, plus root indices pointing into the arena for each +/// expression position (type, value, rule RHS, etc.). +#[derive(Clone, Debug, PartialEq, Eq, Default)] +pub enum ConstantMeta { + #[default] + Empty, + Def { + name: Address, + lvls: Vec
, + hints: ReducibilityHints, + all: Vec
, + ctx: Vec
, + arena: ExprMeta, + type_root: u64, + value_root: u64, + }, + Axio { + name: Address, + lvls: Vec
, + arena: ExprMeta, + type_root: u64, + }, + Quot { + name: Address, + lvls: Vec
, + arena: ExprMeta, + type_root: u64, + }, + Indc { + name: Address, + lvls: Vec
, + ctors: Vec
, + all: Vec
, + ctx: Vec
, + arena: ExprMeta, + type_root: u64, + }, + Ctor { + name: Address, + lvls: Vec
, + induct: Address, + arena: ExprMeta, + type_root: u64, + }, + Rec { + name: Address, + lvls: Vec
, + rules: Vec
, + all: Vec
, + ctx: Vec
, + arena: ExprMeta, + type_root: u64, + rule_roots: Vec, + }, +} + +/// Data values for KVMap metadata. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum DataValue { + OfString(Address), + OfBool(bool), + OfName(Address), + OfNat(Address), + OfInt(Address), + OfSyntax(Address), +} + +// =========================================================================== +// Serialization helpers +// =========================================================================== + +fn put_u8(x: u8, buf: &mut Vec) { + buf.push(x); +} + +fn get_u8(buf: &mut &[u8]) -> Result { + match buf.split_first() { + Some((&x, rest)) => { + *buf = rest; + Ok(x) + }, + None => Err("get_u8: EOF".to_string()), + } +} + +fn put_bool(x: bool, buf: &mut Vec) { + buf.push(if x { 1 } else { 0 }); +} + +fn get_bool(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(false), + 1 => Ok(true), + x => Err(format!("get_bool: invalid {x}")), + } +} + +/// Serialize a raw 32-byte address (for blob addresses not in the name index). +fn put_address_raw(addr: &Address, buf: &mut Vec) { + buf.extend_from_slice(addr.as_bytes()); +} + +/// Deserialize a raw 32-byte address. +fn get_address_raw(buf: &mut &[u8]) -> Result { + if buf.len() < 32 { + return Err(format!("get_address_raw: need 32 bytes, have {}", buf.len())); + } + let (bytes, rest) = buf.split_at(32); + *buf = rest; + Address::from_slice(bytes) + .map_err(|_e| "get_address_raw: invalid".to_string()) +} + +fn put_u64(x: u64, buf: &mut Vec) { + Tag0::new(x).put(buf); +} + +fn get_u64(buf: &mut &[u8]) -> Result { + Ok(Tag0::get(buf)?.size) +} + +fn put_vec_len(len: usize, buf: &mut Vec) { + Tag0::new(len as u64).put(buf); +} + +fn get_vec_len(buf: &mut &[u8]) -> Result { + Ok(Tag0::get(buf)?.size as usize) +} + +// =========================================================================== +// BinderInfo and ReducibilityHints serialization +// =========================================================================== + +impl BinderInfo { + pub fn put(&self, buf: &mut Vec) { + match self { + Self::Default => put_u8(0, buf), + Self::Implicit => put_u8(1, buf), + Self::StrictImplicit => put_u8(2, buf), + Self::InstImplicit => put_u8(3, buf), + } + } + + pub fn get_ser(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(Self::Default), + 1 => Ok(Self::Implicit), + 2 => Ok(Self::StrictImplicit), + 3 => Ok(Self::InstImplicit), + x => Err(format!("BinderInfo::get: invalid {x}")), + } + } +} + +impl ReducibilityHints { + pub fn put(&self, buf: &mut Vec) { + match self { + Self::Opaque => put_u8(0, buf), + Self::Abbrev => put_u8(1, buf), + Self::Regular(x) => { + put_u8(2, buf); + Tag0::new(u64::from(*x)).put(buf); + }, + } + } + + pub fn get_ser(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(Self::Opaque), + 1 => Ok(Self::Abbrev), + 2 => { + let tag = Tag0::get(buf)?; + Ok(Self::Regular(tag.size as u32)) + }, + x => Err(format!("ReducibilityHints::get: invalid {x}")), + } + } +} + +// =========================================================================== +// Indexed serialization (Address -> u64 index) +// =========================================================================== + +/// Name index for serialization: Address -> u64 +pub type NameIndex = HashMap; + +/// Reverse name index for deserialization: position -> Address +pub type NameReverseIndex = Vec
; + +fn put_idx( + addr: &Address, + idx: &NameIndex, + buf: &mut Vec, +) -> Result<(), String> { + let i = idx.get(addr).copied().ok_or_else(|| { + format!( + "put_idx: address {:?} not in name index (index has {} entries)", + addr, + idx.len() + ) + })?; + put_u64(i, buf); + Ok(()) +} + +fn get_idx(buf: &mut &[u8], rev: &NameReverseIndex) -> Result { + let i = get_u64(buf)? as usize; + rev + .get(i) + .cloned() + .ok_or_else(|| format!("invalid name index {i}, max {}", rev.len())) +} + +fn put_idx_vec( + addrs: &[Address], + idx: &NameIndex, + buf: &mut Vec, +) -> Result<(), String> { + put_vec_len(addrs.len(), buf); + for a in addrs { + put_idx(a, idx, buf)?; + } + Ok(()) +} + +fn get_idx_vec( + buf: &mut &[u8], + rev: &NameReverseIndex, +) -> Result, String> { + let len = get_vec_len(buf)?; + let mut v = Vec::with_capacity(len); + for _ in 0..len { + v.push(get_idx(buf, rev)?); + } + Ok(v) +} + +// =========================================================================== +// DataValue indexed serialization +// =========================================================================== + +impl DataValue { + pub fn put_indexed( + &self, + idx: &NameIndex, + buf: &mut Vec, + ) -> Result<(), String> { + match self { + // OfString, OfNat, OfInt, OfSyntax hold blob addresses (not in name index) + Self::OfString(a) => { + put_u8(0, buf); + put_address_raw(a, buf); + }, + Self::OfBool(b) => { + put_u8(1, buf); + put_bool(*b, buf); + }, + // OfName holds a name address (in name index) + Self::OfName(a) => { + put_u8(2, buf); + put_idx(a, idx, buf)?; + }, + Self::OfNat(a) => { + put_u8(3, buf); + put_address_raw(a, buf); + }, + Self::OfInt(a) => { + put_u8(4, buf); + put_address_raw(a, buf); + }, + Self::OfSyntax(a) => { + put_u8(5, buf); + put_address_raw(a, buf); + }, + } + Ok(()) + } + + pub fn get_indexed( + buf: &mut &[u8], + rev: &NameReverseIndex, + ) -> Result { + match get_u8(buf)? { + 0 => Ok(Self::OfString(get_address_raw(buf)?)), + 1 => Ok(Self::OfBool(get_bool(buf)?)), + 2 => Ok(Self::OfName(get_idx(buf, rev)?)), + 3 => Ok(Self::OfNat(get_address_raw(buf)?)), + 4 => Ok(Self::OfInt(get_address_raw(buf)?)), + 5 => Ok(Self::OfSyntax(get_address_raw(buf)?)), + x => Err(format!("DataValue::get: invalid tag {x}")), + } + } +} + +// =========================================================================== +// KVMap and mdata indexed serialization +// =========================================================================== + +fn put_kvmap_indexed( + kvmap: &KVMap, + idx: &NameIndex, + buf: &mut Vec, +) -> Result<(), String> { + put_vec_len(kvmap.len(), buf); + for (k, v) in kvmap { + put_idx(k, idx, buf)?; + v.put_indexed(idx, buf)?; + } + Ok(()) +} + +fn get_kvmap_indexed( + buf: &mut &[u8], + rev: &NameReverseIndex, +) -> Result { + let len = get_vec_len(buf)?; + let mut kvmap = Vec::with_capacity(len); + for _ in 0..len { + kvmap.push((get_idx(buf, rev)?, DataValue::get_indexed(buf, rev)?)); + } + Ok(kvmap) +} + +fn put_mdata_stack_indexed( + mdata: &[KVMap], + idx: &NameIndex, + buf: &mut Vec, +) -> Result<(), String> { + put_vec_len(mdata.len(), buf); + for kv in mdata { + put_kvmap_indexed(kv, idx, buf)?; + } + Ok(()) +} + +fn get_mdata_stack_indexed( + buf: &mut &[u8], + rev: &NameReverseIndex, +) -> Result, String> { + let len = get_vec_len(buf)?; + let mut mdata = Vec::with_capacity(len); + for _ in 0..len { + mdata.push(get_kvmap_indexed(buf, rev)?); + } + Ok(mdata) +} + +// =========================================================================== +// ExprMetaData indexed serialization +// =========================================================================== + +impl ExprMetaData { + // Tag 0: Leaf (no payload) + // Tag 1: App { children: [u32, u32] } + // Tags 2-5: Binder with BinderInfo packed into tag (2 + variant) + // Tag 6: LetBinder { name_idx, children: [u32, u32, u32] } + // Tag 7: Ref { name_idx } + // Tag 8: Prj { struct_name_idx, child: u32 } + // Tag 9: Mdata { kvmap_count, kvmaps..., child: u32 } + + pub fn put_indexed( + &self, + idx: &NameIndex, + buf: &mut Vec, + ) -> Result<(), String> { + match self { + Self::Leaf => put_u8(0, buf), + Self::App { children } => { + put_u8(1, buf); + put_u64(children[0], buf); + put_u64(children[1], buf); + }, + Self::Binder { name, info, children } => { + let tag = 2 + + match info { + BinderInfo::Default => 0u8, + BinderInfo::Implicit => 1, + BinderInfo::StrictImplicit => 2, + BinderInfo::InstImplicit => 3, + }; + put_u8(tag, buf); + put_idx(name, idx, buf)?; + put_u64(children[0], buf); + put_u64(children[1], buf); + }, + Self::LetBinder { name, children } => { + put_u8(6, buf); + put_idx(name, idx, buf)?; + put_u64(children[0], buf); + put_u64(children[1], buf); + put_u64(children[2], buf); + }, + Self::Ref { name } => { + put_u8(7, buf); + put_idx(name, idx, buf)?; + }, + Self::Prj { struct_name, child } => { + put_u8(8, buf); + put_idx(struct_name, idx, buf)?; + put_u64(*child, buf); + }, + Self::Mdata { mdata, child } => { + put_u8(9, buf); + put_mdata_stack_indexed(mdata, idx, buf)?; + put_u64(*child, buf); + }, + } + Ok(()) + } + + pub fn get_indexed( + buf: &mut &[u8], + rev: &NameReverseIndex, + ) -> Result { + match get_u8(buf)? { + 0 => Ok(Self::Leaf), + 1 => { + let c0 = get_u64(buf)?; + let c1 = get_u64(buf)?; + Ok(Self::App { children: [c0, c1] }) + }, + tag @ 2..=5 => { + let info = match tag { + 2 => BinderInfo::Default, + 3 => BinderInfo::Implicit, + 4 => BinderInfo::StrictImplicit, + 5 => BinderInfo::InstImplicit, + _ => unreachable!(), + }; + let name = get_idx(buf, rev)?; + let c0 = get_u64(buf)?; + let c1 = get_u64(buf)?; + Ok(Self::Binder { name, info, children: [c0, c1] }) + }, + 6 => { + let name = get_idx(buf, rev)?; + let c0 = get_u64(buf)?; + let c1 = get_u64(buf)?; + let c2 = get_u64(buf)?; + Ok(Self::LetBinder { name, children: [c0, c1, c2] }) + }, + 7 => { + let name = get_idx(buf, rev)?; + Ok(Self::Ref { name }) + }, + 8 => { + let struct_name = get_idx(buf, rev)?; + let child = get_u64(buf)?; + Ok(Self::Prj { struct_name, child }) + }, + 9 => { + let mdata = get_mdata_stack_indexed(buf, rev)?; + let child = get_u64(buf)?; + Ok(Self::Mdata { mdata, child }) + }, + x => Err(format!("ExprMetaData::get: invalid tag {x}")), + } + } +} + +// =========================================================================== +// ExprMeta (arena) indexed serialization +// =========================================================================== + +impl ExprMeta { + pub fn put_indexed( + &self, + idx: &NameIndex, + buf: &mut Vec, + ) -> Result<(), String> { + put_vec_len(self.nodes.len(), buf); + for node in &self.nodes { + node.put_indexed(idx, buf)?; + } + Ok(()) + } + + pub fn get_indexed( + buf: &mut &[u8], + rev: &NameReverseIndex, + ) -> Result { + let len = get_vec_len(buf)?; + let mut nodes = Vec::with_capacity(len); + for _ in 0..len { + nodes.push(ExprMetaData::get_indexed(buf, rev)?); + } + Ok(ExprMeta { nodes }) + } +} + +fn put_u64_vec(v: &[u64], buf: &mut Vec) { + put_vec_len(v.len(), buf); + for &x in v { + put_u64(x, buf); + } +} + +fn get_u64_vec(buf: &mut &[u8]) -> Result, String> { + let len = get_vec_len(buf)?; + let mut v = Vec::with_capacity(len); + for _ in 0..len { + v.push(get_u64(buf)?); + } + Ok(v) +} + +// =========================================================================== +// ConstantMeta indexed serialization +// =========================================================================== + +impl ConstantMeta { + pub fn put_indexed( + &self, + idx: &NameIndex, + buf: &mut Vec, + ) -> Result<(), String> { + match self { + Self::Empty => put_u8(255, buf), + Self::Def { + name, + lvls, + hints, + all, + ctx, + arena, + type_root, + value_root, + } => { + put_u8(0, buf); + put_idx(name, idx, buf)?; + put_idx_vec(lvls, idx, buf)?; + hints.put(buf); + put_idx_vec(all, idx, buf)?; + put_idx_vec(ctx, idx, buf)?; + arena.put_indexed(idx, buf)?; + put_u64(*type_root, buf); + put_u64(*value_root, buf); + }, + Self::Axio { name, lvls, arena, type_root } => { + put_u8(1, buf); + put_idx(name, idx, buf)?; + put_idx_vec(lvls, idx, buf)?; + arena.put_indexed(idx, buf)?; + put_u64(*type_root, buf); + }, + Self::Quot { name, lvls, arena, type_root } => { + put_u8(2, buf); + put_idx(name, idx, buf)?; + put_idx_vec(lvls, idx, buf)?; + arena.put_indexed(idx, buf)?; + put_u64(*type_root, buf); + }, + Self::Indc { name, lvls, ctors, all, ctx, arena, type_root } => { + put_u8(3, buf); + put_idx(name, idx, buf)?; + put_idx_vec(lvls, idx, buf)?; + put_idx_vec(ctors, idx, buf)?; + put_idx_vec(all, idx, buf)?; + put_idx_vec(ctx, idx, buf)?; + arena.put_indexed(idx, buf)?; + put_u64(*type_root, buf); + }, + Self::Ctor { name, lvls, induct, arena, type_root } => { + put_u8(4, buf); + put_idx(name, idx, buf)?; + put_idx_vec(lvls, idx, buf)?; + put_idx(induct, idx, buf)?; + arena.put_indexed(idx, buf)?; + put_u64(*type_root, buf); + }, + Self::Rec { + name, + lvls, + rules, + all, + ctx, + arena, + type_root, + rule_roots, + } => { + put_u8(5, buf); + put_idx(name, idx, buf)?; + put_idx_vec(lvls, idx, buf)?; + put_idx_vec(rules, idx, buf)?; + put_idx_vec(all, idx, buf)?; + put_idx_vec(ctx, idx, buf)?; + arena.put_indexed(idx, buf)?; + put_u64(*type_root, buf); + put_u64_vec(rule_roots, buf); + }, + } + Ok(()) + } + + pub fn get_indexed( + buf: &mut &[u8], + rev: &NameReverseIndex, + ) -> Result { + match get_u8(buf)? { + 255 => Ok(Self::Empty), + 0 => Ok(Self::Def { + name: get_idx(buf, rev)?, + lvls: get_idx_vec(buf, rev)?, + hints: ReducibilityHints::get_ser(buf)?, + all: get_idx_vec(buf, rev)?, + ctx: get_idx_vec(buf, rev)?, + arena: ExprMeta::get_indexed(buf, rev)?, + type_root: get_u64(buf)?, + value_root: get_u64(buf)?, + }), + 1 => Ok(Self::Axio { + name: get_idx(buf, rev)?, + lvls: get_idx_vec(buf, rev)?, + arena: ExprMeta::get_indexed(buf, rev)?, + type_root: get_u64(buf)?, + }), + 2 => Ok(Self::Quot { + name: get_idx(buf, rev)?, + lvls: get_idx_vec(buf, rev)?, + arena: ExprMeta::get_indexed(buf, rev)?, + type_root: get_u64(buf)?, + }), + 3 => Ok(Self::Indc { + name: get_idx(buf, rev)?, + lvls: get_idx_vec(buf, rev)?, + ctors: get_idx_vec(buf, rev)?, + all: get_idx_vec(buf, rev)?, + ctx: get_idx_vec(buf, rev)?, + arena: ExprMeta::get_indexed(buf, rev)?, + type_root: get_u64(buf)?, + }), + 4 => Ok(Self::Ctor { + name: get_idx(buf, rev)?, + lvls: get_idx_vec(buf, rev)?, + induct: get_idx(buf, rev)?, + arena: ExprMeta::get_indexed(buf, rev)?, + type_root: get_u64(buf)?, + }), + 5 => Ok(Self::Rec { + name: get_idx(buf, rev)?, + lvls: get_idx_vec(buf, rev)?, + rules: get_idx_vec(buf, rev)?, + all: get_idx_vec(buf, rev)?, + ctx: get_idx_vec(buf, rev)?, + arena: ExprMeta::get_indexed(buf, rev)?, + type_root: get_u64(buf)?, + rule_roots: get_u64_vec(buf)?, + }), + x => Err(format!("ConstantMeta::get: invalid tag {x}")), + } + } +} + +// =========================================================================== +// Tests +// =========================================================================== + +#[cfg(test)] +mod tests { + use super::*; + use quickcheck::{Arbitrary, Gen}; + + impl Arbitrary for BinderInfo { + fn arbitrary(g: &mut Gen) -> Self { + match u8::arbitrary(g) % 4 { + 0 => Self::Default, + 1 => Self::Implicit, + 2 => Self::StrictImplicit, + _ => Self::InstImplicit, + } + } + } + + impl Arbitrary for ReducibilityHints { + fn arbitrary(g: &mut Gen) -> Self { + match u8::arbitrary(g) % 3 { + 0 => Self::Opaque, + 1 => Self::Abbrev, + _ => Self::Regular(u32::arbitrary(g)), + } + } + } + + #[test] + fn test_binder_info_roundtrip() { + for bi in [ + BinderInfo::Default, + BinderInfo::Implicit, + BinderInfo::StrictImplicit, + BinderInfo::InstImplicit, + ] { + let mut buf = Vec::new(); + bi.put(&mut buf); + assert_eq!(BinderInfo::get_ser(&mut buf.as_slice()).unwrap(), bi); + } + } + + #[test] + fn test_reducibility_hints_roundtrip() { + for h in [ + ReducibilityHints::Opaque, + ReducibilityHints::Abbrev, + ReducibilityHints::Regular(42), + ] { + let mut buf = Vec::new(); + h.put(&mut buf); + assert_eq!(ReducibilityHints::get_ser(&mut buf.as_slice()).unwrap(), h); + } + } + + #[test] + fn test_constant_meta_indexed_roundtrip() { + // Create test addresses + let addr1 = Address::from_slice(&[1u8; 32]).unwrap(); + let addr2 = Address::from_slice(&[2u8; 32]).unwrap(); + let addr3 = Address::from_slice(&[3u8; 32]).unwrap(); + + // Build index + let mut idx = NameIndex::new(); + idx.insert(addr1.clone(), 0); + idx.insert(addr2.clone(), 1); + idx.insert(addr3.clone(), 2); + + // Build reverse index + let rev: NameReverseIndex = + vec![addr1.clone(), addr2.clone(), addr3.clone()]; + + // Test Def variant with arena + let mut arena = ExprMeta::default(); + let leaf = arena.alloc(ExprMetaData::Leaf); + let binder = arena.alloc(ExprMetaData::Binder { + name: addr1.clone(), + info: BinderInfo::Default, + children: [leaf, leaf], + }); + + let meta = ConstantMeta::Def { + name: addr1.clone(), + lvls: vec![addr2.clone(), addr3.clone()], + hints: ReducibilityHints::Regular(10), + all: vec![addr1.clone()], + ctx: vec![addr2.clone()], + arena, + type_root: binder, + value_root: leaf, + }; + + let mut buf = Vec::new(); + meta.put_indexed(&idx, &mut buf).unwrap(); + let recovered = + ConstantMeta::get_indexed(&mut buf.as_slice(), &rev).unwrap(); + assert_eq!(meta, recovered); + } + + #[test] + fn test_expr_meta_arena_roundtrip() { + let addr1 = Address::from_slice(&[1u8; 32]).unwrap(); + + let mut idx = NameIndex::new(); + idx.insert(addr1.clone(), 0); + let rev: NameReverseIndex = vec![addr1.clone()]; + + let mut arena = ExprMeta::default(); + let leaf = arena.alloc(ExprMetaData::Leaf); + let ref_node = arena.alloc(ExprMetaData::Ref { name: addr1.clone() }); + let app = arena.alloc(ExprMetaData::App { children: [leaf, ref_node] }); + let mdata = arena.alloc(ExprMetaData::Mdata { + mdata: vec![vec![(addr1.clone(), DataValue::OfBool(true))]], + child: app, + }); + let _ = mdata; + + let mut buf = Vec::new(); + arena.put_indexed(&idx, &mut buf).unwrap(); + let recovered = ExprMeta::get_indexed(&mut buf.as_slice(), &rev).unwrap(); + assert_eq!(arena, recovered); + } +} diff --git a/src/ix/ixon/proof.rs b/src/ix/ixon/proof.rs new file mode 100644 index 00000000..00b4d0c8 --- /dev/null +++ b/src/ix/ixon/proof.rs @@ -0,0 +1,1734 @@ +//! Proof, claim, and selective-revelation types for ZK verification. +//! +//! Claims assert properties about committed constants (type-checking, evaluation, +//! or selective field revelation). Proofs pair a claim with opaque proof bytes. +//! +//! `RevealConstantInfo` uses bitmask-based serialization: a mask (Tag0) encodes +//! which fields are present, followed by only the present field values in bit +//! order. This enables revealing specific fields of a committed constant without +//! exposing the full structure. + +use crate::ix::address::Address; +use crate::ix::env::{DefinitionSafety, QuotKind}; + +use super::constant::DefKind; +use super::tag::{Tag0, Tag4}; + +// ============================================================================ +// Core claim/proof types +// ============================================================================ + +/// An evaluation claim: asserts that the constant at `input` evaluates to the +/// constant at `output`. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct EvalClaim { + /// Address of the input constant + pub input: Address, + /// Address of the output constant + pub output: Address, +} + +/// A type-checking claim: asserts that the constant at `value` is well-typed. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct CheckClaim { + /// Address of the value constant + pub value: Address, +} + +// ============================================================================ +// RevealClaim types +// ============================================================================ + +/// Revealed fields of a Constructor within an Inductive. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct RevealConstructorInfo { + pub is_unsafe: Option, + pub lvls: Option, + pub cidx: Option, + pub params: Option, + pub fields: Option, + /// blake3(serialized typ Expr) + pub typ: Option
, +} + +/// Revealed fields of a RecursorRule. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct RevealRecursorRule { + pub rule_idx: u64, + pub fields: u64, + /// blake3(serialized rhs Expr) + pub rhs: Address, +} + +/// Revealed fields of a MutConst component. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum RevealMutConstInfo { + Defn { + kind: Option, + safety: Option, + lvls: Option, + typ: Option
, + value: Option
, + }, + Indc { + recr: Option, + refl: Option, + is_unsafe: Option, + lvls: Option, + params: Option, + indices: Option, + nested: Option, + typ: Option
, + ctors: Option>, + }, + Recr { + k: Option, + is_unsafe: Option, + lvls: Option, + params: Option, + indices: Option, + motives: Option, + minors: Option, + typ: Option
, + rules: Option>, + }, +} + +/// Revealed fields of a ConstantInfo behind a commitment. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum RevealConstantInfo { + Defn { + kind: Option, + safety: Option, + lvls: Option, + typ: Option
, + value: Option
, + }, + Recr { + k: Option, + is_unsafe: Option, + lvls: Option, + params: Option, + indices: Option, + motives: Option, + minors: Option, + typ: Option
, + rules: Option>, + }, + Axio { + is_unsafe: Option, + lvls: Option, + typ: Option
, + }, + Quot { + kind: Option, + lvls: Option, + typ: Option
, + }, + CPrj { + idx: Option, + cidx: Option, + block: Option
, + }, + RPrj { + idx: Option, + block: Option
, + }, + IPrj { + idx: Option, + block: Option
, + }, + DPrj { + idx: Option, + block: Option
, + }, + Muts { + components: Vec<(u64, RevealMutConstInfo)>, + }, +} + +/// A reveal claim: selective revelation of fields of a committed constant. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct RevealClaim { + /// Address of the commitment + pub comm: Address, + /// Revealed field information + pub info: RevealConstantInfo, +} + +// ============================================================================ +// Claim and Proof enums +// ============================================================================ + +/// A claim that can be proven. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Claim { + /// Evaluation claim + Evals(EvalClaim), + /// Type-checking claim + Checks(CheckClaim), + /// Reveal claim (selective field revelation) + Reveals(RevealClaim), +} + +/// A proof of a claim. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Proof { + /// The claim being proven + pub claim: Claim, + /// The proof data (opaque bytes, e.g., ZK proof) + pub proof: Vec, +} + +// ============================================================================ +// Tag4 variant layout for flag 0xE +// ============================================================================ + +/// Tag4 flag for claims, proofs, commitments, and environment (0xE). +/// Size field encodes variant: +/// - 0: Environment (Env) +/// - 1: CheckProof (proof of CheckClaim) +/// - 2: EvalProof (proof of EvalClaim) +/// - 3: CheckClaim (no proof) +/// - 4: EvalClaim (no proof) +/// - 5: Commitment +/// - 6: RevealClaim +/// - 7: RevealProof +pub const FLAG: u8 = 0xE; + +const VARIANT_CHECK_PROOF: u64 = 1; +const VARIANT_EVAL_PROOF: u64 = 2; +const VARIANT_CHECK_CLAIM: u64 = 3; +const VARIANT_EVAL_CLAIM: u64 = 4; +// VARIANT 5 = Comm (handled in comm.rs) +const VARIANT_REVEAL_CLAIM: u64 = 6; +const VARIANT_REVEAL_PROOF: u64 = 7; + +// ============================================================================ +// Serialization helpers +// ============================================================================ + +fn get_address(buf: &mut &[u8]) -> Result { + if buf.len() < 32 { + return Err(format!("get_address: need 32 bytes, have {}", buf.len())); + } + let (bytes, rest) = buf.split_at(32); + *buf = rest; + Address::from_slice(bytes).map_err(|_e| "get_address: invalid".to_string()) +} + +fn get_u8(buf: &mut &[u8]) -> Result { + match buf.split_first() { + Some((&x, rest)) => { + *buf = rest; + Ok(x) + }, + None => Err("get_u8: EOF".to_string()), + } +} + +fn compute_mask(flags: &[bool]) -> u64 { + let mut mask: u64 = 0; + for (i, &f) in flags.iter().enumerate() { + if f { + mask |= 1 << i; + } + } + mask +} + +fn put_def_kind(k: DefKind, buf: &mut Vec) { + buf.push(match k { + DefKind::Definition => 0, + DefKind::Opaque => 1, + DefKind::Theorem => 2, + }); +} + +fn get_def_kind(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(DefKind::Definition), + 1 => Ok(DefKind::Opaque), + 2 => Ok(DefKind::Theorem), + x => Err(format!("get_def_kind: invalid {x}")), + } +} + +fn put_def_safety(s: DefinitionSafety, buf: &mut Vec) { + buf.push(match s { + DefinitionSafety::Unsafe => 0, + DefinitionSafety::Safe => 1, + DefinitionSafety::Partial => 2, + }); +} + +fn get_def_safety(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(DefinitionSafety::Unsafe), + 1 => Ok(DefinitionSafety::Safe), + 2 => Ok(DefinitionSafety::Partial), + x => Err(format!("get_def_safety: invalid {x}")), + } +} + +fn put_quot_kind(k: QuotKind, buf: &mut Vec) { + buf.push(match k { + QuotKind::Type => 0, + QuotKind::Ctor => 1, + QuotKind::Lift => 2, + QuotKind::Ind => 3, + }); +} + +fn get_quot_kind(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(QuotKind::Type), + 1 => Ok(QuotKind::Ctor), + 2 => Ok(QuotKind::Lift), + 3 => Ok(QuotKind::Ind), + x => Err(format!("get_quot_kind: invalid {x}")), + } +} + +fn put_bool_field(b: bool, buf: &mut Vec) { + buf.push(u8::from(b)); +} + +fn get_bool_field(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(false), + 1 => Ok(true), + x => Err(format!("get_bool_field: invalid {x}")), + } +} + +// ============================================================================ +// RevealConstructorInfo serialization +// ============================================================================ + +impl RevealConstructorInfo { + /// Serialize: mask (Tag0) + field values in mask order. + pub fn put(&self, buf: &mut Vec) { + let mask = compute_mask(&[ + self.is_unsafe.is_some(), + self.lvls.is_some(), + self.cidx.is_some(), + self.params.is_some(), + self.fields.is_some(), + self.typ.is_some(), + ]); + Tag0::new(mask).put(buf); + if let Some(u) = self.is_unsafe { + put_bool_field(u, buf); + } + if let Some(l) = self.lvls { + Tag0::new(l).put(buf); + } + if let Some(c) = self.cidx { + Tag0::new(c).put(buf); + } + if let Some(p) = self.params { + Tag0::new(p).put(buf); + } + if let Some(f) = self.fields { + Tag0::new(f).put(buf); + } + if let Some(t) = &self.typ { + buf.extend_from_slice(t.as_bytes()); + } + } + + pub fn get(buf: &mut &[u8]) -> Result { + let mask = Tag0::get(buf)?.size; + let is_unsafe = + if mask & 1 != 0 { Some(get_bool_field(buf)?) } else { None }; + let lvls = if mask & 2 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let cidx = if mask & 4 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let params = if mask & 8 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let fields = if mask & 16 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let typ = if mask & 32 != 0 { Some(get_address(buf)?) } else { None }; + Ok(RevealConstructorInfo { is_unsafe, lvls, cidx, params, fields, typ }) + } +} + +// ============================================================================ +// RevealRecursorRule serialization +// ============================================================================ + +impl RevealRecursorRule { + pub fn put(&self, buf: &mut Vec) { + Tag0::new(self.rule_idx).put(buf); + Tag0::new(self.fields).put(buf); + buf.extend_from_slice(self.rhs.as_bytes()); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let rule_idx = Tag0::get(buf)?.size; + let fields = Tag0::get(buf)?.size; + let rhs = get_address(buf)?; + Ok(RevealRecursorRule { rule_idx, fields, rhs }) + } +} + +// ============================================================================ +// Helper: put/get rules and ctors arrays +// ============================================================================ + +fn put_rules(rules: &[RevealRecursorRule], buf: &mut Vec) { + Tag0::new(rules.len() as u64).put(buf); + for rule in rules { + rule.put(buf); + } +} + +fn get_rules(buf: &mut &[u8]) -> Result, String> { + let count = + usize::try_from(Tag0::get(buf)?.size).map_err(|e| e.to_string())?; + let mut rules = Vec::with_capacity(count); + for _ in 0..count { + rules.push(RevealRecursorRule::get(buf)?); + } + Ok(rules) +} + +fn put_ctors(ctors: &[(u64, RevealConstructorInfo)], buf: &mut Vec) { + Tag0::new(ctors.len() as u64).put(buf); + for (idx, info) in ctors { + Tag0::new(*idx).put(buf); + info.put(buf); + } +} + +fn get_ctors( + buf: &mut &[u8], +) -> Result, String> { + let count = + usize::try_from(Tag0::get(buf)?.size).map_err(|e| e.to_string())?; + let mut ctors = Vec::with_capacity(count); + for _ in 0..count { + let idx = Tag0::get(buf)?.size; + let info = RevealConstructorInfo::get(buf)?; + ctors.push((idx, info)); + } + Ok(ctors) +} + +// ============================================================================ +// RevealMutConstInfo serialization +// ============================================================================ + +impl RevealMutConstInfo { + pub fn put(&self, buf: &mut Vec) { + match self { + Self::Defn { kind, safety, lvls, typ, value } => { + buf.push(0); + let mask = compute_mask(&[ + kind.is_some(), + safety.is_some(), + lvls.is_some(), + typ.is_some(), + value.is_some(), + ]); + Tag0::new(mask).put(buf); + if let Some(k) = kind { + put_def_kind(*k, buf); + } + if let Some(s) = safety { + put_def_safety(*s, buf); + } + if let Some(l) = lvls { + Tag0::new(*l).put(buf); + } + if let Some(t) = typ { + buf.extend_from_slice(t.as_bytes()); + } + if let Some(v) = value { + buf.extend_from_slice(v.as_bytes()); + } + }, + Self::Indc { + recr, + refl, + is_unsafe, + lvls, + params, + indices, + nested, + typ, + ctors, + } => { + buf.push(1); + let mask = compute_mask(&[ + recr.is_some(), + refl.is_some(), + is_unsafe.is_some(), + lvls.is_some(), + params.is_some(), + indices.is_some(), + nested.is_some(), + typ.is_some(), + ctors.is_some(), + ]); + Tag0::new(mask).put(buf); + if let Some(r) = recr { + put_bool_field(*r, buf); + } + if let Some(r) = refl { + put_bool_field(*r, buf); + } + if let Some(u) = is_unsafe { + put_bool_field(*u, buf); + } + if let Some(l) = lvls { + Tag0::new(*l).put(buf); + } + if let Some(p) = params { + Tag0::new(*p).put(buf); + } + if let Some(i) = indices { + Tag0::new(*i).put(buf); + } + if let Some(n) = nested { + Tag0::new(*n).put(buf); + } + if let Some(t) = typ { + buf.extend_from_slice(t.as_bytes()); + } + if let Some(c) = ctors { + put_ctors(c, buf); + } + }, + Self::Recr { + k, + is_unsafe, + lvls, + params, + indices, + motives, + minors, + typ, + rules, + } => { + buf.push(2); + let mask = compute_mask(&[ + k.is_some(), + is_unsafe.is_some(), + lvls.is_some(), + params.is_some(), + indices.is_some(), + motives.is_some(), + minors.is_some(), + typ.is_some(), + rules.is_some(), + ]); + Tag0::new(mask).put(buf); + if let Some(k) = k { + put_bool_field(*k, buf); + } + if let Some(u) = is_unsafe { + put_bool_field(*u, buf); + } + if let Some(l) = lvls { + Tag0::new(*l).put(buf); + } + if let Some(p) = params { + Tag0::new(*p).put(buf); + } + if let Some(i) = indices { + Tag0::new(*i).put(buf); + } + if let Some(m) = motives { + Tag0::new(*m).put(buf); + } + if let Some(m) = minors { + Tag0::new(*m).put(buf); + } + if let Some(t) = typ { + buf.extend_from_slice(t.as_bytes()); + } + if let Some(r) = rules { + put_rules(r, buf); + } + }, + } + } + + pub fn get(buf: &mut &[u8]) -> Result { + let variant = get_u8(buf)?; + let mask = Tag0::get(buf)?.size; + match variant { + 0 => { + let kind = if mask & 1 != 0 { Some(get_def_kind(buf)?) } else { None }; + let safety = + if mask & 2 != 0 { Some(get_def_safety(buf)?) } else { None }; + let lvls = + if mask & 4 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let typ = if mask & 8 != 0 { Some(get_address(buf)?) } else { None }; + let value = if mask & 16 != 0 { Some(get_address(buf)?) } else { None }; + Ok(Self::Defn { kind, safety, lvls, typ, value }) + }, + 1 => { + let recr = + if mask & 1 != 0 { Some(get_bool_field(buf)?) } else { None }; + let refl = + if mask & 2 != 0 { Some(get_bool_field(buf)?) } else { None }; + let is_unsafe = + if mask & 4 != 0 { Some(get_bool_field(buf)?) } else { None }; + let lvls = + if mask & 8 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let params = + if mask & 16 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let indices = + if mask & 32 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let nested = + if mask & 64 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let typ = if mask & 128 != 0 { Some(get_address(buf)?) } else { None }; + let ctors = if mask & 256 != 0 { Some(get_ctors(buf)?) } else { None }; + Ok(Self::Indc { + recr, + refl, + is_unsafe, + lvls, + params, + indices, + nested, + typ, + ctors, + }) + }, + 2 => { + let k = if mask & 1 != 0 { Some(get_bool_field(buf)?) } else { None }; + let is_unsafe = + if mask & 2 != 0 { Some(get_bool_field(buf)?) } else { None }; + let lvls = + if mask & 4 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let params = + if mask & 8 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let indices = + if mask & 16 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let motives = + if mask & 32 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let minors = + if mask & 64 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let typ = if mask & 128 != 0 { Some(get_address(buf)?) } else { None }; + let rules = if mask & 256 != 0 { Some(get_rules(buf)?) } else { None }; + Ok(Self::Recr { + k, + is_unsafe, + lvls, + params, + indices, + motives, + minors, + typ, + rules, + }) + }, + x => Err(format!("RevealMutConstInfo::get: invalid variant {x}")), + } + } +} + +// ============================================================================ +// RevealConstantInfo serialization +// ============================================================================ + +impl RevealConstantInfo { + /// Serialize: variant byte + mask (Tag0) + field values in mask order. + pub fn put(&self, buf: &mut Vec) { + match self { + Self::Defn { kind, safety, lvls, typ, value } => { + buf.push(0); + let mask = compute_mask(&[ + kind.is_some(), + safety.is_some(), + lvls.is_some(), + typ.is_some(), + value.is_some(), + ]); + Tag0::new(mask).put(buf); + if let Some(k) = kind { + put_def_kind(*k, buf); + } + if let Some(s) = safety { + put_def_safety(*s, buf); + } + if let Some(l) = lvls { + Tag0::new(*l).put(buf); + } + if let Some(t) = typ { + buf.extend_from_slice(t.as_bytes()); + } + if let Some(v) = value { + buf.extend_from_slice(v.as_bytes()); + } + }, + Self::Recr { + k, + is_unsafe, + lvls, + params, + indices, + motives, + minors, + typ, + rules, + } => { + buf.push(1); + let mask = compute_mask(&[ + k.is_some(), + is_unsafe.is_some(), + lvls.is_some(), + params.is_some(), + indices.is_some(), + motives.is_some(), + minors.is_some(), + typ.is_some(), + rules.is_some(), + ]); + Tag0::new(mask).put(buf); + if let Some(k) = k { + put_bool_field(*k, buf); + } + if let Some(u) = is_unsafe { + put_bool_field(*u, buf); + } + if let Some(l) = lvls { + Tag0::new(*l).put(buf); + } + if let Some(p) = params { + Tag0::new(*p).put(buf); + } + if let Some(i) = indices { + Tag0::new(*i).put(buf); + } + if let Some(m) = motives { + Tag0::new(*m).put(buf); + } + if let Some(m) = minors { + Tag0::new(*m).put(buf); + } + if let Some(t) = typ { + buf.extend_from_slice(t.as_bytes()); + } + if let Some(r) = rules { + put_rules(r, buf); + } + }, + Self::Axio { is_unsafe, lvls, typ } => { + buf.push(2); + let mask = + compute_mask(&[is_unsafe.is_some(), lvls.is_some(), typ.is_some()]); + Tag0::new(mask).put(buf); + if let Some(u) = is_unsafe { + put_bool_field(*u, buf); + } + if let Some(l) = lvls { + Tag0::new(*l).put(buf); + } + if let Some(t) = typ { + buf.extend_from_slice(t.as_bytes()); + } + }, + Self::Quot { kind, lvls, typ } => { + buf.push(3); + let mask = + compute_mask(&[kind.is_some(), lvls.is_some(), typ.is_some()]); + Tag0::new(mask).put(buf); + if let Some(k) = kind { + put_quot_kind(*k, buf); + } + if let Some(l) = lvls { + Tag0::new(*l).put(buf); + } + if let Some(t) = typ { + buf.extend_from_slice(t.as_bytes()); + } + }, + Self::CPrj { idx, cidx, block } => { + buf.push(4); + let mask = + compute_mask(&[idx.is_some(), cidx.is_some(), block.is_some()]); + Tag0::new(mask).put(buf); + if let Some(i) = idx { + Tag0::new(*i).put(buf); + } + if let Some(c) = cidx { + Tag0::new(*c).put(buf); + } + if let Some(b) = block { + buf.extend_from_slice(b.as_bytes()); + } + }, + Self::RPrj { idx, block } => { + buf.push(5); + let mask = compute_mask(&[idx.is_some(), block.is_some()]); + Tag0::new(mask).put(buf); + if let Some(i) = idx { + Tag0::new(*i).put(buf); + } + if let Some(b) = block { + buf.extend_from_slice(b.as_bytes()); + } + }, + Self::IPrj { idx, block } => { + buf.push(6); + let mask = compute_mask(&[idx.is_some(), block.is_some()]); + Tag0::new(mask).put(buf); + if let Some(i) = idx { + Tag0::new(*i).put(buf); + } + if let Some(b) = block { + buf.extend_from_slice(b.as_bytes()); + } + }, + Self::DPrj { idx, block } => { + buf.push(7); + let mask = compute_mask(&[idx.is_some(), block.is_some()]); + Tag0::new(mask).put(buf); + if let Some(i) = idx { + Tag0::new(*i).put(buf); + } + if let Some(b) = block { + buf.extend_from_slice(b.as_bytes()); + } + }, + Self::Muts { components } => { + buf.push(8); + let mask: u64 = if components.is_empty() { 0 } else { 1 }; + Tag0::new(mask).put(buf); + if !components.is_empty() { + Tag0::new(components.len() as u64).put(buf); + for (idx, info) in components { + Tag0::new(*idx).put(buf); + info.put(buf); + } + } + }, + } + } + + pub fn get(buf: &mut &[u8]) -> Result { + let variant = get_u8(buf)?; + let mask = Tag0::get(buf)?.size; + match variant { + 0 => { + // Defn + let kind = if mask & 1 != 0 { Some(get_def_kind(buf)?) } else { None }; + let safety = + if mask & 2 != 0 { Some(get_def_safety(buf)?) } else { None }; + let lvls = + if mask & 4 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let typ = if mask & 8 != 0 { Some(get_address(buf)?) } else { None }; + let value = if mask & 16 != 0 { Some(get_address(buf)?) } else { None }; + Ok(Self::Defn { kind, safety, lvls, typ, value }) + }, + 1 => { + // Recr + let k = if mask & 1 != 0 { Some(get_bool_field(buf)?) } else { None }; + let is_unsafe = + if mask & 2 != 0 { Some(get_bool_field(buf)?) } else { None }; + let lvls = + if mask & 4 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let params = + if mask & 8 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let indices = + if mask & 16 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let motives = + if mask & 32 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let minors = + if mask & 64 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let typ = if mask & 128 != 0 { Some(get_address(buf)?) } else { None }; + let rules = if mask & 256 != 0 { Some(get_rules(buf)?) } else { None }; + Ok(Self::Recr { + k, + is_unsafe, + lvls, + params, + indices, + motives, + minors, + typ, + rules, + }) + }, + 2 => { + // Axio + let is_unsafe = + if mask & 1 != 0 { Some(get_bool_field(buf)?) } else { None }; + let lvls = + if mask & 2 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let typ = if mask & 4 != 0 { Some(get_address(buf)?) } else { None }; + Ok(Self::Axio { is_unsafe, lvls, typ }) + }, + 3 => { + // Quot + let kind = if mask & 1 != 0 { Some(get_quot_kind(buf)?) } else { None }; + let lvls = + if mask & 2 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let typ = if mask & 4 != 0 { Some(get_address(buf)?) } else { None }; + Ok(Self::Quot { kind, lvls, typ }) + }, + 4 => { + // CPrj + let idx = if mask & 1 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let cidx = + if mask & 2 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let block = if mask & 4 != 0 { Some(get_address(buf)?) } else { None }; + Ok(Self::CPrj { idx, cidx, block }) + }, + 5 => { + // RPrj + let idx = if mask & 1 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let block = if mask & 2 != 0 { Some(get_address(buf)?) } else { None }; + Ok(Self::RPrj { idx, block }) + }, + 6 => { + // IPrj + let idx = if mask & 1 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let block = if mask & 2 != 0 { Some(get_address(buf)?) } else { None }; + Ok(Self::IPrj { idx, block }) + }, + 7 => { + // DPrj + let idx = if mask & 1 != 0 { Some(Tag0::get(buf)?.size) } else { None }; + let block = if mask & 2 != 0 { Some(get_address(buf)?) } else { None }; + Ok(Self::DPrj { idx, block }) + }, + 8 => { + // Muts + let components = if mask & 1 != 0 { + let count = + usize::try_from(Tag0::get(buf)?.size).map_err(|e| e.to_string())?; + let mut comps = Vec::with_capacity(count); + for _ in 0..count { + let idx = Tag0::get(buf)?.size; + let info = RevealMutConstInfo::get(buf)?; + comps.push((idx, info)); + } + comps + } else { + Vec::new() + }; + Ok(Self::Muts { components }) + }, + x => Err(format!("RevealConstantInfo::get: invalid variant {x}")), + } + } +} + +// ============================================================================ +// Claim serialization +// ============================================================================ + +impl Claim { + pub fn put(&self, buf: &mut Vec) { + match self { + Claim::Evals(eval) => { + Tag4::new(FLAG, VARIANT_EVAL_CLAIM).put(buf); + buf.extend_from_slice(eval.input.as_bytes()); + buf.extend_from_slice(eval.output.as_bytes()); + }, + Claim::Checks(check) => { + Tag4::new(FLAG, VARIANT_CHECK_CLAIM).put(buf); + buf.extend_from_slice(check.value.as_bytes()); + }, + Claim::Reveals(reveal) => { + Tag4::new(FLAG, VARIANT_REVEAL_CLAIM).put(buf); + buf.extend_from_slice(reveal.comm.as_bytes()); + reveal.info.put(buf); + }, + } + } + + pub fn get(buf: &mut &[u8]) -> Result { + let tag = Tag4::get(buf)?; + if tag.flag != FLAG { + return Err(format!( + "Claim::get: expected flag 0x{:X}, got 0x{:X}", + FLAG, tag.flag + )); + } + + match tag.size { + VARIANT_EVAL_CLAIM => { + let input = get_address(buf)?; + let output = get_address(buf)?; + Ok(Claim::Evals(EvalClaim { input, output })) + }, + VARIANT_CHECK_CLAIM => { + let value = get_address(buf)?; + Ok(Claim::Checks(CheckClaim { value })) + }, + VARIANT_REVEAL_CLAIM => { + let comm = get_address(buf)?; + let info = RevealConstantInfo::get(buf)?; + Ok(Claim::Reveals(RevealClaim { comm, info })) + }, + VARIANT_EVAL_PROOF | VARIANT_CHECK_PROOF | VARIANT_REVEAL_PROOF => Err( + format!("Claim::get: got Proof variant {}, use Proof::get", tag.size), + ), + x => Err(format!("Claim::get: invalid variant {x}")), + } + } + + /// Serialize a claim and compute its content address. + pub fn commit(&self) -> (Address, Vec) { + let mut buf = Vec::new(); + self.put(&mut buf); + let addr = Address::hash(&buf); + (addr, buf) + } +} + +// ============================================================================ +// Proof serialization +// ============================================================================ + +impl Proof { + pub fn new(claim: Claim, proof: Vec) -> Self { + Proof { claim, proof } + } + + pub fn put(&self, buf: &mut Vec) { + match &self.claim { + Claim::Evals(eval) => { + Tag4::new(FLAG, VARIANT_EVAL_PROOF).put(buf); + buf.extend_from_slice(eval.input.as_bytes()); + buf.extend_from_slice(eval.output.as_bytes()); + }, + Claim::Checks(check) => { + Tag4::new(FLAG, VARIANT_CHECK_PROOF).put(buf); + buf.extend_from_slice(check.value.as_bytes()); + }, + Claim::Reveals(reveal) => { + Tag4::new(FLAG, VARIANT_REVEAL_PROOF).put(buf); + buf.extend_from_slice(reveal.comm.as_bytes()); + reveal.info.put(buf); + }, + } + // Proof bytes: length prefix + data + Tag0::new(self.proof.len() as u64).put(buf); + buf.extend_from_slice(&self.proof); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let tag = Tag4::get(buf)?; + if tag.flag != FLAG { + return Err(format!( + "Proof::get: expected flag 0x{:X}, got 0x{:X}", + FLAG, tag.flag + )); + } + + let claim = match tag.size { + VARIANT_EVAL_PROOF => { + let input = get_address(buf)?; + let output = get_address(buf)?; + Claim::Evals(EvalClaim { input, output }) + }, + VARIANT_CHECK_PROOF => { + let value = get_address(buf)?; + Claim::Checks(CheckClaim { value }) + }, + VARIANT_REVEAL_PROOF => { + let comm = get_address(buf)?; + let info = RevealConstantInfo::get(buf)?; + Claim::Reveals(RevealClaim { comm, info }) + }, + VARIANT_EVAL_CLAIM | VARIANT_CHECK_CLAIM | VARIANT_REVEAL_CLAIM => { + return Err(format!( + "Proof::get: got Claim variant {}, use Claim::get", + tag.size + )); + }, + x => return Err(format!("Proof::get: invalid variant {x}")), + }; + + // Proof bytes + let len = usize::try_from(Tag0::get(buf)?.size) + .map_err(|_e| "Proof::get: Tag0 size overflows usize".to_string())?; + if buf.len() < len { + return Err(format!( + "Proof::get: need {} bytes for proof data, have {}", + len, + buf.len() + )); + } + let (proof_bytes, rest) = buf.split_at(len); + *buf = rest; + + Ok(Proof { claim, proof: proof_bytes.to_vec() }) + } + + /// Serialize a proof and compute its content address. + pub fn commit(&self) -> (Address, Vec) { + let mut buf = Vec::new(); + self.put(&mut buf); + let addr = Address::hash(&buf); + (addr, buf) + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use quickcheck::{Arbitrary, Gen}; + + // ========== Arbitrary impls ========== + + impl Arbitrary for EvalClaim { + fn arbitrary(g: &mut Gen) -> Self { + EvalClaim { input: Address::arbitrary(g), output: Address::arbitrary(g) } + } + } + + impl Arbitrary for CheckClaim { + fn arbitrary(g: &mut Gen) -> Self { + CheckClaim { value: Address::arbitrary(g) } + } + } + + impl Arbitrary for RevealConstructorInfo { + fn arbitrary(g: &mut Gen) -> Self { + RevealConstructorInfo { + is_unsafe: if bool::arbitrary(g) { + Some(bool::arbitrary(g)) + } else { + None + }, + lvls: if bool::arbitrary(g) { + Some(u64::arbitrary(g) % 10) + } else { + None + }, + cidx: if bool::arbitrary(g) { + Some(u64::arbitrary(g) % 10) + } else { + None + }, + params: if bool::arbitrary(g) { + Some(u64::arbitrary(g) % 10) + } else { + None + }, + fields: if bool::arbitrary(g) { + Some(u64::arbitrary(g) % 10) + } else { + None + }, + typ: if bool::arbitrary(g) { + Some(Address::arbitrary(g)) + } else { + None + }, + } + } + } + + impl Arbitrary for RevealRecursorRule { + fn arbitrary(g: &mut Gen) -> Self { + RevealRecursorRule { + rule_idx: u64::arbitrary(g) % 10, + fields: u64::arbitrary(g) % 10, + rhs: Address::arbitrary(g), + } + } + } + + fn gen_opt_rules(g: &mut Gen) -> Option> { + if bool::arbitrary(g) { + let n = (u8::arbitrary(g) % 4) as usize; + Some((0..n).map(|_| RevealRecursorRule::arbitrary(g)).collect()) + } else { + None + } + } + + fn gen_opt_ctors(g: &mut Gen) -> Option> { + if bool::arbitrary(g) { + let n = (u8::arbitrary(g) % 4) as usize; + Some( + (0..n) + .map(|_| { + (u64::arbitrary(g) % 10, RevealConstructorInfo::arbitrary(g)) + }) + .collect(), + ) + } else { + None + } + } + + fn gen_opt_bool(g: &mut Gen) -> Option { + if bool::arbitrary(g) { Some(bool::arbitrary(g)) } else { None } + } + + fn gen_opt_u64(g: &mut Gen) -> Option { + if bool::arbitrary(g) { Some(u64::arbitrary(g) % 100) } else { None } + } + + fn gen_opt_addr(g: &mut Gen) -> Option
{ + if bool::arbitrary(g) { Some(Address::arbitrary(g)) } else { None } + } + + impl Arbitrary for RevealMutConstInfo { + fn arbitrary(g: &mut Gen) -> Self { + match u8::arbitrary(g) % 3 { + 0 => Self::Defn { + kind: if bool::arbitrary(g) { + Some(DefKind::arbitrary(g)) + } else { + None + }, + safety: if bool::arbitrary(g) { + Some(DefinitionSafety::arbitrary(g)) + } else { + None + }, + lvls: gen_opt_u64(g), + typ: gen_opt_addr(g), + value: gen_opt_addr(g), + }, + 1 => Self::Indc { + recr: gen_opt_bool(g), + refl: gen_opt_bool(g), + is_unsafe: gen_opt_bool(g), + lvls: gen_opt_u64(g), + params: gen_opt_u64(g), + indices: gen_opt_u64(g), + nested: gen_opt_u64(g), + typ: gen_opt_addr(g), + ctors: gen_opt_ctors(g), + }, + _ => Self::Recr { + k: gen_opt_bool(g), + is_unsafe: gen_opt_bool(g), + lvls: gen_opt_u64(g), + params: gen_opt_u64(g), + indices: gen_opt_u64(g), + motives: gen_opt_u64(g), + minors: gen_opt_u64(g), + typ: gen_opt_addr(g), + rules: gen_opt_rules(g), + }, + } + } + } + + impl Arbitrary for RevealConstantInfo { + fn arbitrary(g: &mut Gen) -> Self { + match u8::arbitrary(g) % 9 { + 0 => Self::Defn { + kind: if bool::arbitrary(g) { + Some(DefKind::arbitrary(g)) + } else { + None + }, + safety: if bool::arbitrary(g) { + Some(DefinitionSafety::arbitrary(g)) + } else { + None + }, + lvls: gen_opt_u64(g), + typ: gen_opt_addr(g), + value: gen_opt_addr(g), + }, + 1 => Self::Recr { + k: gen_opt_bool(g), + is_unsafe: gen_opt_bool(g), + lvls: gen_opt_u64(g), + params: gen_opt_u64(g), + indices: gen_opt_u64(g), + motives: gen_opt_u64(g), + minors: gen_opt_u64(g), + typ: gen_opt_addr(g), + rules: gen_opt_rules(g), + }, + 2 => Self::Axio { + is_unsafe: gen_opt_bool(g), + lvls: gen_opt_u64(g), + typ: gen_opt_addr(g), + }, + 3 => Self::Quot { + kind: if bool::arbitrary(g) { + Some(QuotKind::arbitrary(g)) + } else { + None + }, + lvls: gen_opt_u64(g), + typ: gen_opt_addr(g), + }, + 4 => Self::CPrj { + idx: gen_opt_u64(g), + cidx: gen_opt_u64(g), + block: gen_opt_addr(g), + }, + 5 => Self::RPrj { idx: gen_opt_u64(g), block: gen_opt_addr(g) }, + 6 => Self::IPrj { idx: gen_opt_u64(g), block: gen_opt_addr(g) }, + 7 => Self::DPrj { idx: gen_opt_u64(g), block: gen_opt_addr(g) }, + _ => { + let n = (u8::arbitrary(g) % 4) as usize; + Self::Muts { + components: (0..n) + .map(|_| { + (u64::arbitrary(g) % 10, RevealMutConstInfo::arbitrary(g)) + }) + .collect(), + } + }, + } + } + } + + impl Arbitrary for RevealClaim { + fn arbitrary(g: &mut Gen) -> Self { + RevealClaim { + comm: Address::arbitrary(g), + info: RevealConstantInfo::arbitrary(g), + } + } + } + + impl Arbitrary for Claim { + fn arbitrary(g: &mut Gen) -> Self { + match u8::arbitrary(g) % 3 { + 0 => Claim::Evals(EvalClaim::arbitrary(g)), + 1 => Claim::Checks(CheckClaim::arbitrary(g)), + _ => Claim::Reveals(RevealClaim::arbitrary(g)), + } + } + } + + impl Arbitrary for Proof { + fn arbitrary(g: &mut Gen) -> Self { + let len = u8::arbitrary(g) as usize % 64; + let proof: Vec = (0..len).map(|_| u8::arbitrary(g)).collect(); + Proof { claim: Claim::arbitrary(g), proof } + } + } + + // ========== Roundtrip helpers ========== + + fn claim_roundtrip(c: &Claim) -> bool { + let mut buf = Vec::new(); + c.put(&mut buf); + match Claim::get(&mut buf.as_slice()) { + Ok(c2) => c == &c2, + Err(e) => { + eprintln!("claim_roundtrip error: {e}"); + false + }, + } + } + + fn proof_roundtrip(p: &Proof) -> bool { + let mut buf = Vec::new(); + p.put(&mut buf); + match Proof::get(&mut buf.as_slice()) { + Ok(p2) => p == &p2, + Err(e) => { + eprintln!("proof_roundtrip error: {e}"); + false + }, + } + } + + fn reveal_info_roundtrip(info: &RevealConstantInfo) -> bool { + let mut buf = Vec::new(); + info.put(&mut buf); + match RevealConstantInfo::get(&mut buf.as_slice()) { + Ok(info2) => info == &info2, + Err(e) => { + eprintln!("reveal_info_roundtrip error: {e}"); + false + }, + } + } + + // ========== Quickcheck properties ========== + + #[allow(clippy::needless_pass_by_value)] + #[quickcheck] + fn prop_claim_roundtrip(c: Claim) -> bool { + claim_roundtrip(&c) + } + + #[allow(clippy::needless_pass_by_value)] + #[quickcheck] + fn prop_proof_roundtrip(p: Proof) -> bool { + proof_roundtrip(&p) + } + + #[allow(clippy::needless_pass_by_value)] + #[quickcheck] + fn prop_reveal_info_roundtrip(info: RevealConstantInfo) -> bool { + reveal_info_roundtrip(&info) + } + + // ========== Manual roundtrip tests ========== + + #[test] + fn test_eval_claim_roundtrip() { + let claim = Claim::Evals(EvalClaim { + input: Address::hash(b"input"), + output: Address::hash(b"output"), + }); + assert!(claim_roundtrip(&claim)); + } + + #[test] + fn test_check_claim_roundtrip() { + let claim = Claim::Checks(CheckClaim { value: Address::hash(b"value") }); + assert!(claim_roundtrip(&claim)); + } + + #[test] + fn test_eval_proof_roundtrip() { + let proof = Proof::new( + Claim::Evals(EvalClaim { + input: Address::hash(b"input"), + output: Address::hash(b"output"), + }), + vec![1, 2, 3, 4], + ); + assert!(proof_roundtrip(&proof)); + } + + #[test] + fn test_check_proof_roundtrip() { + let proof = Proof::new( + Claim::Checks(CheckClaim { value: Address::hash(b"value") }), + vec![5, 6, 7, 8, 9], + ); + assert!(proof_roundtrip(&proof)); + } + + #[test] + fn test_empty_proof_data() { + let proof = Proof::new( + Claim::Evals(EvalClaim { + input: Address::hash(b"c"), + output: Address::hash(b"d"), + }), + vec![], + ); + assert!(proof_roundtrip(&proof)); + } + + #[test] + fn test_reveal_claim_roundtrip() { + let claim = Claim::Reveals(RevealClaim { + comm: Address::hash(b"comm"), + info: RevealConstantInfo::Defn { + kind: Some(DefKind::Definition), + safety: Some(DefinitionSafety::Safe), + lvls: Some(0), + typ: None, + value: None, + }, + }); + assert!(claim_roundtrip(&claim)); + } + + #[test] + fn test_reveal_proof_roundtrip() { + let proof = Proof::new( + Claim::Reveals(RevealClaim { + comm: Address::hash(b"comm"), + info: RevealConstantInfo::Axio { + is_unsafe: Some(false), + lvls: None, + typ: Some(Address::hash(b"typ")), + }, + }), + vec![0xAB, 0xCD], + ); + assert!(proof_roundtrip(&proof)); + } + + // ========== Tag byte tests ========== + + #[test] + fn test_claim_tags() { + // EvalClaim should be 0xE4 + let eval_claim = Claim::Evals(EvalClaim { + input: Address::hash(b"a"), + output: Address::hash(b"b"), + }); + let mut buf = Vec::new(); + eval_claim.put(&mut buf); + assert_eq!(buf[0], 0xE4); + + // CheckClaim should be 0xE3 + let check_claim = Claim::Checks(CheckClaim { value: Address::hash(b"a") }); + let mut buf = Vec::new(); + check_claim.put(&mut buf); + assert_eq!(buf[0], 0xE3); + + // RevealClaim should be 0xE6 + let reveal_claim = Claim::Reveals(RevealClaim { + comm: Address::hash(b"a"), + info: RevealConstantInfo::Defn { + kind: None, + safety: None, + lvls: None, + typ: None, + value: None, + }, + }); + let mut buf = Vec::new(); + reveal_claim.put(&mut buf); + assert_eq!(buf[0], 0xE6); + } + + #[test] + fn test_proof_tags() { + // EvalProof should be 0xE2 + let eval_proof = Proof::new( + Claim::Evals(EvalClaim { + input: Address::hash(b"a"), + output: Address::hash(b"b"), + }), + vec![1, 2, 3], + ); + let mut buf = Vec::new(); + eval_proof.put(&mut buf); + assert_eq!(buf[0], 0xE2); + + // CheckProof should be 0xE1 + let check_proof = Proof::new( + Claim::Checks(CheckClaim { value: Address::hash(b"a") }), + vec![4, 5, 6], + ); + let mut buf = Vec::new(); + check_proof.put(&mut buf); + assert_eq!(buf[0], 0xE1); + + // RevealProof should be 0xE7 + let reveal_proof = Proof::new( + Claim::Reveals(RevealClaim { + comm: Address::hash(b"a"), + info: RevealConstantInfo::Defn { + kind: None, + safety: None, + lvls: None, + typ: None, + value: None, + }, + }), + vec![7, 8], + ); + let mut buf = Vec::new(); + reveal_proof.put(&mut buf); + assert_eq!(buf[0], 0xE7); + } + + // ========== Bitmask encoding tests from plan examples ========== + + #[test] + fn test_reveal_defn_safety() { + // Plan example: Reveal that a committed Definition has safety = Safe + // 0xE6 <32 bytes comm> 0x00 0x02 0x01 + let claim = Claim::Reveals(RevealClaim { + comm: Address::hash(b"test_comm"), + info: RevealConstantInfo::Defn { + kind: None, + safety: Some(DefinitionSafety::Safe), + lvls: None, + typ: None, + value: None, + }, + }); + let mut buf = Vec::new(); + claim.put(&mut buf); + assert_eq!(buf[0], 0xE6); // Tag4: RevealClaim + // buf[1..33] = comm_addr (32 bytes) + assert_eq!(buf[33], 0x00); // variant: Definition + assert_eq!(buf[34], 0x02); // mask: bit 1 (safety) + assert_eq!(buf[35], 0x01); // DefinitionSafety::Safe + assert_eq!(buf.len(), 36); // Total: 1 + 32 + 1 + 1 + 1 = 36 bytes + } + + #[test] + fn test_reveal_defn_typ() { + // Plan example: Reveal a committed Definition's type expression + // 0xE6 <32 bytes comm> 0x00 0x08 <32 bytes typ> + let typ_addr = Address::hash(b"serialized typ expr"); + let claim = Claim::Reveals(RevealClaim { + comm: Address::hash(b"test_comm"), + info: RevealConstantInfo::Defn { + kind: None, + safety: None, + lvls: None, + typ: Some(typ_addr), + value: None, + }, + }); + let mut buf = Vec::new(); + claim.put(&mut buf); + assert_eq!(buf[0], 0xE6); // Tag4: RevealClaim + assert_eq!(buf[33], 0x00); // variant: Definition + assert_eq!(buf[34], 0x08); // mask: bit 3 (typ) + // buf[35..67] = typ address (32 bytes) + assert_eq!(buf.len(), 67); // Total: 1 + 32 + 1 + 1 + 32 = 67 bytes + } + + #[test] + fn test_reveal_muts_component_safety() { + // Plan example: Reveal a Muts component's safety + // 0xE6 <32 comm> 0x08 0x01 0x01 0x02 0x00 0x02 0x01 + let claim = Claim::Reveals(RevealClaim { + comm: Address::hash(b"test_comm"), + info: RevealConstantInfo::Muts { + components: vec![( + 2, + RevealMutConstInfo::Defn { + kind: None, + safety: Some(DefinitionSafety::Safe), + lvls: None, + typ: None, + value: None, + }, + )], + }, + }); + let mut buf = Vec::new(); + claim.put(&mut buf); + assert_eq!(buf[0], 0xE6); // Tag4: RevealClaim + assert_eq!(buf[33], 0x08); // variant: Muts + assert_eq!(buf[34], 0x01); // mask: bit 0 (components) + assert_eq!(buf[35], 0x01); // Tag0: 1 component revealed + assert_eq!(buf[36], 0x02); // Tag0: component index 2 + assert_eq!(buf[37], 0x00); // RevealMutConstInfo variant: Definition + assert_eq!(buf[38], 0x02); // mask: bit 1 (safety) + assert_eq!(buf[39], 0x01); // DefinitionSafety::Safe + assert_eq!(buf.len(), 40); // Total: 1 + 32 + 7 = 40 bytes + } + + // ========== All RevealConstantInfo variant roundtrips ========== + + #[test] + fn test_reveal_all_variants() { + let cases: Vec = vec![ + // Defn with all fields + RevealConstantInfo::Defn { + kind: Some(DefKind::Theorem), + safety: Some(DefinitionSafety::Partial), + lvls: Some(3), + typ: Some(Address::hash(b"typ")), + value: Some(Address::hash(b"val")), + }, + // Defn with no fields + RevealConstantInfo::Defn { + kind: None, + safety: None, + lvls: None, + typ: None, + value: None, + }, + // Recr with rules + RevealConstantInfo::Recr { + k: Some(true), + is_unsafe: None, + lvls: Some(1), + params: None, + indices: None, + motives: None, + minors: None, + typ: None, + rules: Some(vec![RevealRecursorRule { + rule_idx: 0, + fields: 2, + rhs: Address::hash(b"rhs"), + }]), + }, + // Axio + RevealConstantInfo::Axio { + is_unsafe: Some(false), + lvls: Some(0), + typ: Some(Address::hash(b"axtyp")), + }, + // Quot + RevealConstantInfo::Quot { + kind: Some(QuotKind::Lift), + lvls: None, + typ: None, + }, + // CPrj + RevealConstantInfo::CPrj { + idx: Some(0), + cidx: Some(1), + block: Some(Address::hash(b"block")), + }, + // RPrj + RevealConstantInfo::RPrj { idx: Some(2), block: None }, + // IPrj + RevealConstantInfo::IPrj { + idx: None, + block: Some(Address::hash(b"blk")), + }, + // DPrj + RevealConstantInfo::DPrj { + idx: Some(5), + block: Some(Address::hash(b"dblk")), + }, + // Muts with components + RevealConstantInfo::Muts { + components: vec![ + ( + 0, + RevealMutConstInfo::Indc { + recr: Some(true), + refl: None, + is_unsafe: Some(false), + lvls: None, + params: Some(2), + indices: None, + nested: None, + typ: None, + ctors: Some(vec![( + 0, + RevealConstructorInfo { + is_unsafe: Some(false), + lvls: None, + cidx: Some(0), + params: None, + fields: Some(3), + typ: None, + }, + )]), + }, + ), + ( + 1, + RevealMutConstInfo::Recr { + k: None, + is_unsafe: None, + lvls: Some(1), + params: None, + indices: None, + motives: None, + minors: None, + typ: None, + rules: None, + }, + ), + ], + }, + // Muts with empty components + RevealConstantInfo::Muts { components: vec![] }, + ]; + + for (i, info) in cases.iter().enumerate() { + assert!( + reveal_info_roundtrip(info), + "RevealConstantInfo roundtrip failed for case {i}" + ); + } + } +} diff --git a/src/ix/ixon/serialize.rs b/src/ix/ixon/serialize.rs new file mode 100644 index 00000000..c0572160 --- /dev/null +++ b/src/ix/ixon/serialize.rs @@ -0,0 +1,1592 @@ +//! Serialization for Ixon types. +//! +//! This module provides serialization/deserialization for all Ixon types +//! using the Tag4/Tag2/Tag0 encoding schemes. + +#![allow(clippy::cast_possible_truncation)] +#![allow(clippy::map_err_ignore)] +#![allow(clippy::needless_pass_by_value)] + +use std::sync::Arc; + +use crate::ix::address::Address; +use crate::ix::env::{DefinitionSafety, QuotKind}; + +use super::constant::{ + Axiom, Constant, ConstantInfo, Constructor, ConstructorProj, DefKind, + Definition, DefinitionProj, Inductive, InductiveProj, MutConst, Quotient, + Recursor, RecursorProj, RecursorRule, +}; +use super::expr::Expr; +use super::tag::{Tag0, Tag4}; +use super::univ::{Univ, get_univ, put_univ}; + +// ============================================================================ +// Primitive helpers +// ============================================================================ + +/// Cap capacity for Vec allocation during deserialization. +/// Prevents OOM from malicious/malformed input claiming huge sizes. +/// Each item requires at least 1 byte, so capacity can never exceed buffer length. +#[inline] +fn capped_capacity(count: u64, buf: &[u8]) -> usize { + (count as usize).min(buf.len()) +} + +fn put_u8(x: u8, buf: &mut Vec) { + buf.push(x); +} + +fn get_u8(buf: &mut &[u8]) -> Result { + match buf.split_first() { + Some((&x, rest)) => { + *buf = rest; + Ok(x) + }, + None => Err("get_u8: EOF".to_string()), + } +} + +fn put_bool(x: bool, buf: &mut Vec) { + buf.push(if x { 1 } else { 0 }); +} + +fn get_bool(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(false), + 1 => Ok(true), + x => Err(format!("get_bool: invalid {x}")), + } +} + +fn put_u64(x: u64, buf: &mut Vec) { + Tag0::new(x).put(buf); +} + +fn get_u64(buf: &mut &[u8]) -> Result { + Ok(Tag0::get(buf)?.size) +} + +fn put_bytes(bytes: &[u8], buf: &mut Vec) { + buf.extend_from_slice(bytes); +} + +fn put_address(a: &Address, buf: &mut Vec) { + put_bytes(a.as_bytes(), buf); +} + +fn get_address(buf: &mut &[u8]) -> Result { + if buf.len() < 32 { + return Err(format!("get_address: need 32 bytes, have {}", buf.len())); + } + let (bytes, rest) = buf.split_at(32); + *buf = rest; + Address::from_slice(bytes).map_err(|_| "get_address: invalid".to_string()) +} + +/// Pack up to 8 bools into a u8. +pub fn pack_bools(bools: I) -> u8 +where + I: IntoIterator, +{ + let mut acc: u8 = 0; + for (i, b) in bools.into_iter().take(8).enumerate() { + if b { + acc |= 1u8 << (i as u32); + } + } + acc +} + +/// Unpack up to n bools from a u8. +pub fn unpack_bools(n: usize, b: u8) -> Vec { + (0..8).map(|i: u32| (b & (1u8 << i)) != 0).take(n.min(8)).collect() +} + +// ============================================================================ +// Expression serialization +// ============================================================================ + +/// Serialize an expression to bytes (iterative to avoid stack overflow). +pub fn put_expr(e: &Expr, buf: &mut Vec) { + let mut stack: Vec<&Expr> = vec![e]; + + while let Some(curr) = stack.pop() { + match curr { + Expr::Sort(univ_idx) => { + Tag4::new(Expr::FLAG_SORT, *univ_idx).put(buf); + }, + Expr::Var(idx) => { + Tag4::new(Expr::FLAG_VAR, *idx).put(buf); + }, + Expr::Ref(ref_idx, univ_indices) => { + Tag4::new(Expr::FLAG_REF, univ_indices.len() as u64).put(buf); + put_u64(*ref_idx, buf); + for idx in univ_indices { + put_u64(*idx, buf); + } + }, + Expr::Rec(rec_idx, univ_indices) => { + Tag4::new(Expr::FLAG_REC, univ_indices.len() as u64).put(buf); + put_u64(*rec_idx, buf); + for idx in univ_indices { + put_u64(*idx, buf); + } + }, + Expr::Prj(type_ref_idx, field_idx, val) => { + Tag4::new(Expr::FLAG_PRJ, *field_idx).put(buf); + put_u64(*type_ref_idx, buf); + stack.push(val); + }, + Expr::Str(ref_idx) => { + Tag4::new(Expr::FLAG_STR, *ref_idx).put(buf); + }, + Expr::Nat(ref_idx) => { + Tag4::new(Expr::FLAG_NAT, *ref_idx).put(buf); + }, + Expr::App(..) => { + // Telescope compression: count nested apps + let count = curr.app_telescope_count(); + Tag4::new(Expr::FLAG_APP, count).put(buf); + // Collect function and args + let mut e = curr; + let mut args = Vec::with_capacity(count as usize); + while let Expr::App(func, arg) = e { + args.push(arg.as_ref()); + e = func.as_ref(); + } + // Push in reverse order: args (reversed back to normal), then func + for arg in &args { + stack.push(*arg); + } + stack.push(e); // func last, processed first + }, + Expr::Lam(..) => { + // Telescope compression: count nested lambdas + let count = curr.lam_telescope_count(); + Tag4::new(Expr::FLAG_LAM, count).put(buf); + // Collect types and body + let mut e = curr; + let mut types = Vec::with_capacity(count as usize); + while let Expr::Lam(t, b) = e { + types.push(t.as_ref()); + e = b.as_ref(); + } + // Push body first (processed last), then types in reverse order + stack.push(e); // body + for ty in types.into_iter().rev() { + stack.push(ty); + } + }, + Expr::All(..) => { + // Telescope compression: count nested foralls + let count = curr.all_telescope_count(); + Tag4::new(Expr::FLAG_ALL, count).put(buf); + // Collect types and body + let mut e = curr; + let mut types = Vec::with_capacity(count as usize); + while let Expr::All(t, b) = e { + types.push(t.as_ref()); + e = b.as_ref(); + } + // Push body first (processed last), then types in reverse order + stack.push(e); // body + for ty in types.into_iter().rev() { + stack.push(ty); + } + }, + Expr::Let(non_dep, ty, val, body) => { + // size=0 for dep, size=1 for non_dep + Tag4::new(Expr::FLAG_LET, if *non_dep { 1 } else { 0 }).put(buf); + stack.push(body); // Process body last + stack.push(val); + stack.push(ty); // Process ty first + }, + Expr::Share(idx) => { + Tag4::new(Expr::FLAG_SHARE, *idx).put(buf); + }, + } + } +} + +/// Frame for iterative expression deserialization. +enum GetExprFrame { + /// Parse an expression from the buffer + Parse, + /// Build Prj with stored idx, pop val and typ + BuildPrj(u64, u64), // type_ref_idx, field_idx + /// Build App: pop func and arg, push App(func, arg) + BuildApp, + /// Collect n more args for App telescope, then wrap + CollectApps(u64), + /// Collect remaining Lam types: have `collected`, need `remaining` more + CollectLamType { collected: Vec>, remaining: u64 }, + /// Build Lam telescope: wrap body in Lams using stored types + BuildLams(Vec>), + /// Collect remaining All types: have `collected`, need `remaining` more + CollectAllType { collected: Vec>, remaining: u64 }, + /// Build All telescope: wrap body in Alls using stored types + BuildAlls(Vec>), + /// Build Let with stored non_dep flag + BuildLet(bool), +} + +/// Deserialize an expression from bytes (iterative to avoid stack overflow). +pub fn get_expr(buf: &mut &[u8]) -> Result, String> { + let mut work: Vec = vec![GetExprFrame::Parse]; + let mut results: Vec> = Vec::new(); + + while let Some(frame) = work.pop() { + match frame { + GetExprFrame::Parse => { + let tag = Tag4::get(buf)?; + match tag.flag { + Expr::FLAG_SORT => { + results.push(Expr::sort(tag.size)); + }, + Expr::FLAG_VAR => { + results.push(Expr::var(tag.size)); + }, + Expr::FLAG_REF => { + let ref_idx = get_u64(buf)?; + let mut univ_indices = + Vec::with_capacity(capped_capacity(tag.size, buf)); + for _ in 0..tag.size { + univ_indices.push(get_u64(buf)?); + } + results.push(Expr::reference(ref_idx, univ_indices)); + }, + Expr::FLAG_REC => { + let rec_idx = get_u64(buf)?; + let mut univ_indices = + Vec::with_capacity(capped_capacity(tag.size, buf)); + for _ in 0..tag.size { + univ_indices.push(get_u64(buf)?); + } + results.push(Expr::rec(rec_idx, univ_indices)); + }, + Expr::FLAG_PRJ => { + let type_ref_idx = get_u64(buf)?; + // Parse val, then build Prj + work.push(GetExprFrame::BuildPrj(type_ref_idx, tag.size)); + work.push(GetExprFrame::Parse); // val + }, + Expr::FLAG_STR => { + results.push(Expr::str(tag.size)); + }, + Expr::FLAG_NAT => { + results.push(Expr::nat(tag.size)); + }, + Expr::FLAG_APP => { + if tag.size == 0 { + return Err("get_expr: App with zero args".to_string()); + } + // Parse func, then collect args and wrap + work.push(GetExprFrame::CollectApps(tag.size)); + work.push(GetExprFrame::Parse); // func + }, + Expr::FLAG_LAM => { + if tag.size == 0 { + return Err("get_expr: Lam with zero binders".to_string()); + } + // Start collecting types + work.push(GetExprFrame::CollectLamType { + collected: Vec::new(), + remaining: tag.size, + }); + work.push(GetExprFrame::Parse); // first type + }, + Expr::FLAG_ALL => { + if tag.size == 0 { + return Err("get_expr: All with zero binders".to_string()); + } + // Start collecting types + work.push(GetExprFrame::CollectAllType { + collected: Vec::new(), + remaining: tag.size, + }); + work.push(GetExprFrame::Parse); // first type + }, + Expr::FLAG_LET => { + // size=0 for dep, size=1 for non_dep + let non_dep = tag.size != 0; + work.push(GetExprFrame::BuildLet(non_dep)); + work.push(GetExprFrame::Parse); // body + work.push(GetExprFrame::Parse); // val + work.push(GetExprFrame::Parse); // ty + }, + Expr::FLAG_SHARE => { + results.push(Expr::share(tag.size)); + }, + f => return Err(format!("get_expr: invalid flag {f}")), + } + }, + GetExprFrame::BuildPrj(type_ref_idx, field_idx) => { + let val = results.pop().ok_or("get_expr: missing val for Prj")?; + results.push(Expr::prj(type_ref_idx, field_idx, val)); + }, + GetExprFrame::BuildApp => { + let arg = results.pop().ok_or("get_expr: missing arg for App")?; + let func = results.pop().ok_or("get_expr: missing func for App")?; + results.push(Expr::app(func, arg)); + }, + GetExprFrame::CollectApps(remaining) => { + if remaining == 0 { + // All args collected, result is already on stack + } else { + // Parse next arg, apply to current func + work.push(GetExprFrame::CollectApps(remaining - 1)); + work.push(GetExprFrame::BuildApp); + work.push(GetExprFrame::Parse); // arg + } + }, + GetExprFrame::CollectLamType { mut collected, remaining } => { + // Pop the just-parsed type + let ty = results.pop().ok_or("get_expr: missing type for Lam")?; + collected.push(ty); + + if remaining > 1 { + // More types to collect + work.push(GetExprFrame::CollectLamType { + collected, + remaining: remaining - 1, + }); + work.push(GetExprFrame::Parse); // next type + } else { + // All types collected, now parse body + work.push(GetExprFrame::BuildLams(collected)); + work.push(GetExprFrame::Parse); // body + } + }, + GetExprFrame::BuildLams(types) => { + let mut body = results.pop().ok_or("get_expr: missing body for Lam")?; + for ty in types.into_iter().rev() { + body = Expr::lam(ty, body); + } + results.push(body); + }, + GetExprFrame::CollectAllType { mut collected, remaining } => { + // Pop the just-parsed type + let ty = results.pop().ok_or("get_expr: missing type for All")?; + collected.push(ty); + + if remaining > 1 { + // More types to collect + work.push(GetExprFrame::CollectAllType { + collected, + remaining: remaining - 1, + }); + work.push(GetExprFrame::Parse); // next type + } else { + // All types collected, now parse body + work.push(GetExprFrame::BuildAlls(collected)); + work.push(GetExprFrame::Parse); // body + } + }, + GetExprFrame::BuildAlls(types) => { + let mut body = results.pop().ok_or("get_expr: missing body for All")?; + for ty in types.into_iter().rev() { + body = Expr::all(ty, body); + } + results.push(body); + }, + GetExprFrame::BuildLet(non_dep) => { + let body = results.pop().ok_or("get_expr: missing body for Let")?; + let val = results.pop().ok_or("get_expr: missing val for Let")?; + let ty = results.pop().ok_or("get_expr: missing ty for Let")?; + results.push(Expr::let_(non_dep, ty, val, body)); + }, + } + } + + results.pop().ok_or_else(|| "get_expr: no result".to_string()) +} + +// ============================================================================ +// Constant serialization +// ============================================================================ + +impl DefKind { + fn to_u8(self) -> u8 { + match self { + Self::Definition => 0, + Self::Opaque => 1, + Self::Theorem => 2, + } + } + + fn from_u8(x: u8) -> Result { + match x { + 0 => Ok(Self::Definition), + 1 => Ok(Self::Opaque), + 2 => Ok(Self::Theorem), + x => Err(format!("DefKind::from_u8: invalid {x}")), + } + } +} + +impl DefinitionSafety { + fn to_u8(self) -> u8 { + match self { + Self::Unsafe => 0, + Self::Safe => 1, + Self::Partial => 2, + } + } + + fn from_u8(x: u8) -> Result { + match x { + 0 => Ok(Self::Unsafe), + 1 => Ok(Self::Safe), + 2 => Ok(Self::Partial), + x => Err(format!("DefinitionSafety::from_u8: invalid {x}")), + } + } +} + +/// Pack DefKind (2 bits) and DefinitionSafety (2 bits) into a single byte. +fn pack_def_kind_safety(kind: DefKind, safety: DefinitionSafety) -> u8 { + (kind.to_u8() << 2) | safety.to_u8() +} + +/// Unpack DefKind and DefinitionSafety from a single byte. +fn unpack_def_kind_safety( + b: u8, +) -> Result<(DefKind, DefinitionSafety), String> { + let kind = DefKind::from_u8(b >> 2)?; + let safety = DefinitionSafety::from_u8(b & 0x3)?; + Ok((kind, safety)) +} + +impl QuotKind { + pub fn put_ser(&self, buf: &mut Vec) { + match self { + Self::Type => put_u8(0, buf), + Self::Ctor => put_u8(1, buf), + Self::Lift => put_u8(2, buf), + Self::Ind => put_u8(3, buf), + } + } + + pub fn get_ser(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(Self::Type), + 1 => Ok(Self::Ctor), + 2 => Ok(Self::Lift), + 3 => Ok(Self::Ind), + x => Err(format!("QuotKind::get: invalid {x}")), + } + } +} + +fn put_sharing(sharing: &[Arc], buf: &mut Vec) { + put_u64(sharing.len() as u64, buf); + for s in sharing { + put_expr(s, buf); + } +} + +fn get_sharing(buf: &mut &[u8]) -> Result>, String> { + let num = get_u64(buf)?; + let mut sharing = Vec::with_capacity(capped_capacity(num, buf)); + for _ in 0..num { + sharing.push(get_expr(buf)?); + } + Ok(sharing) +} + +impl Definition { + pub fn put(&self, buf: &mut Vec) { + // Pack DefKind + DefinitionSafety into single byte + put_u8(pack_def_kind_safety(self.kind, self.safety), buf); + put_u64(self.lvls, buf); + put_expr(&self.typ, buf); + put_expr(&self.value, buf); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let (kind, safety) = unpack_def_kind_safety(get_u8(buf)?)?; + let lvls = get_u64(buf)?; + let typ = get_expr(buf)?; + let value = get_expr(buf)?; + Ok(Definition { kind, safety, lvls, typ, value }) + } +} + +impl RecursorRule { + pub fn put(&self, buf: &mut Vec) { + put_u64(self.fields, buf); + put_expr(&self.rhs, buf); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let fields = get_u64(buf)?; + let rhs = get_expr(buf)?; + Ok(RecursorRule { fields, rhs }) + } +} + +impl Recursor { + pub fn put(&self, buf: &mut Vec) { + put_u8(pack_bools([self.k, self.is_unsafe]), buf); + put_u64(self.lvls, buf); + put_u64(self.params, buf); + put_u64(self.indices, buf); + put_u64(self.motives, buf); + put_u64(self.minors, buf); + put_expr(&self.typ, buf); + put_u64(self.rules.len() as u64, buf); + for rule in &self.rules { + rule.put(buf); + } + } + + pub fn get(buf: &mut &[u8]) -> Result { + let bools = unpack_bools(2, get_u8(buf)?); + let lvls = get_u64(buf)?; + let params = get_u64(buf)?; + let indices = get_u64(buf)?; + let motives = get_u64(buf)?; + let minors = get_u64(buf)?; + let typ = get_expr(buf)?; + let num_rules = get_u64(buf)?; + let mut rules = Vec::with_capacity(capped_capacity(num_rules, buf)); + for _ in 0..num_rules { + rules.push(RecursorRule::get(buf)?); + } + Ok(Recursor { + k: bools[0], + is_unsafe: bools[1], + lvls, + params, + indices, + motives, + minors, + typ, + rules, + }) + } +} + +impl Axiom { + pub fn put(&self, buf: &mut Vec) { + put_bool(self.is_unsafe, buf); + put_u64(self.lvls, buf); + put_expr(&self.typ, buf); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let is_unsafe = get_bool(buf)?; + let lvls = get_u64(buf)?; + let typ = get_expr(buf)?; + Ok(Axiom { is_unsafe, lvls, typ }) + } +} + +impl Quotient { + pub fn put(&self, buf: &mut Vec) { + self.kind.put_ser(buf); + put_u64(self.lvls, buf); + put_expr(&self.typ, buf); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let kind = QuotKind::get_ser(buf)?; + let lvls = get_u64(buf)?; + let typ = get_expr(buf)?; + Ok(Quotient { kind, lvls, typ }) + } +} + +impl Constructor { + pub fn put(&self, buf: &mut Vec) { + put_bool(self.is_unsafe, buf); + put_u64(self.lvls, buf); + put_u64(self.cidx, buf); + put_u64(self.params, buf); + put_u64(self.fields, buf); + put_expr(&self.typ, buf); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let is_unsafe = get_bool(buf)?; + let lvls = get_u64(buf)?; + let cidx = get_u64(buf)?; + let params = get_u64(buf)?; + let fields = get_u64(buf)?; + let typ = get_expr(buf)?; + Ok(Constructor { is_unsafe, lvls, cidx, params, fields, typ }) + } +} + +impl Inductive { + pub fn put(&self, buf: &mut Vec) { + put_u8(pack_bools([self.recr, self.refl, self.is_unsafe]), buf); + put_u64(self.lvls, buf); + put_u64(self.params, buf); + put_u64(self.indices, buf); + put_u64(self.nested, buf); + put_expr(&self.typ, buf); + put_u64(self.ctors.len() as u64, buf); + for ctor in &self.ctors { + ctor.put(buf); + } + } + + pub fn get(buf: &mut &[u8]) -> Result { + let bools = unpack_bools(3, get_u8(buf)?); + let lvls = get_u64(buf)?; + let params = get_u64(buf)?; + let indices = get_u64(buf)?; + let nested = get_u64(buf)?; + let typ = get_expr(buf)?; + let num_ctors = get_u64(buf)?; + let mut ctors = Vec::with_capacity(capped_capacity(num_ctors, buf)); + for _ in 0..num_ctors { + ctors.push(Constructor::get(buf)?); + } + Ok(Inductive { + recr: bools[0], + refl: bools[1], + is_unsafe: bools[2], + lvls, + params, + indices, + nested, + typ, + ctors, + }) + } +} + +impl InductiveProj { + pub fn put(&self, buf: &mut Vec) { + put_u64(self.idx, buf); + put_address(&self.block, buf); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let idx = get_u64(buf)?; + let block = get_address(buf)?; + Ok(InductiveProj { idx, block }) + } +} + +impl ConstructorProj { + pub fn put(&self, buf: &mut Vec) { + put_u64(self.idx, buf); + put_u64(self.cidx, buf); + put_address(&self.block, buf); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let idx = get_u64(buf)?; + let cidx = get_u64(buf)?; + let block = get_address(buf)?; + Ok(ConstructorProj { idx, cidx, block }) + } +} + +impl RecursorProj { + pub fn put(&self, buf: &mut Vec) { + put_u64(self.idx, buf); + put_address(&self.block, buf); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let idx = get_u64(buf)?; + let block = get_address(buf)?; + Ok(RecursorProj { idx, block }) + } +} + +impl DefinitionProj { + pub fn put(&self, buf: &mut Vec) { + put_u64(self.idx, buf); + put_address(&self.block, buf); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let idx = get_u64(buf)?; + let block = get_address(buf)?; + Ok(DefinitionProj { idx, block }) + } +} + +impl MutConst { + pub fn put(&self, buf: &mut Vec) { + match self { + Self::Defn(d) => { + put_u8(0, buf); + d.put(buf); + }, + Self::Indc(i) => { + put_u8(1, buf); + i.put(buf); + }, + Self::Recr(r) => { + put_u8(2, buf); + r.put(buf); + }, + } + } + + pub fn get(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(Self::Defn(Definition::get(buf)?)), + 1 => Ok(Self::Indc(Inductive::get(buf)?)), + 2 => Ok(Self::Recr(Recursor::get(buf)?)), + x => Err(format!("MutConst::get: invalid tag {x}")), + } + } +} + +impl ConstantInfo { + /// Serialize a non-Muts ConstantInfo (Muts is handled separately in Constant::put) + pub fn put(&self, buf: &mut Vec) { + match self { + Self::Defn(d) => d.put(buf), + Self::Recr(r) => r.put(buf), + Self::Axio(a) => a.put(buf), + Self::Quot(q) => q.put(buf), + Self::CPrj(c) => c.put(buf), + Self::RPrj(r) => r.put(buf), + Self::IPrj(i) => i.put(buf), + Self::DPrj(d) => d.put(buf), + Self::Muts(_) => unreachable!("Muts handled in Constant::put"), + } + } + + /// Deserialize a non-Muts ConstantInfo (Muts is handled separately with FLAG_MUTS) + pub fn get(variant: u64, buf: &mut &[u8]) -> Result { + match variant { + Self::CONST_DEFN => Ok(Self::Defn(Definition::get(buf)?)), + Self::CONST_RECR => Ok(Self::Recr(Recursor::get(buf)?)), + Self::CONST_AXIO => Ok(Self::Axio(Axiom::get(buf)?)), + Self::CONST_QUOT => Ok(Self::Quot(Quotient::get(buf)?)), + Self::CONST_CPRJ => Ok(Self::CPrj(ConstructorProj::get(buf)?)), + Self::CONST_RPRJ => Ok(Self::RPrj(RecursorProj::get(buf)?)), + Self::CONST_IPRJ => Ok(Self::IPrj(InductiveProj::get(buf)?)), + Self::CONST_DPRJ => Ok(Self::DPrj(DefinitionProj::get(buf)?)), + x => Err(format!("ConstantInfo::get: invalid variant {x}")), + } + } +} + +fn put_refs(refs: &[Address], buf: &mut Vec) { + put_u64(refs.len() as u64, buf); + for r in refs { + put_address(r, buf); + } +} + +fn get_refs(buf: &mut &[u8]) -> Result, String> { + let num = get_u64(buf)?; + let mut refs = Vec::with_capacity(capped_capacity(num, buf)); + for _ in 0..num { + refs.push(get_address(buf)?); + } + Ok(refs) +} + +fn put_univs(univs: &[Arc], buf: &mut Vec) { + put_u64(univs.len() as u64, buf); + for u in univs { + put_univ(u, buf); + } +} + +fn get_univs(buf: &mut &[u8]) -> Result>, String> { + let num = get_u64(buf)?; + let mut univs = Vec::with_capacity(capped_capacity(num, buf)); + for _ in 0..num { + univs.push(get_univ(buf)?); + } + Ok(univs) +} + +impl Constant { + pub fn put(&self, buf: &mut Vec) { + match &self.info { + ConstantInfo::Muts(mutuals) => { + // Use FLAG_MUTS (0xC) with entry count in size field + Tag4::new(Self::FLAG_MUTS, mutuals.len() as u64).put(buf); + // Entries directly (no length prefix - it's in the tag) + for m in mutuals { + m.put(buf); + } + }, + _ => { + // Use FLAG (0xD) with variant in size field (always 0-7, fits in 1 byte) + Tag4::new(Self::FLAG, self.info.variant().unwrap()).put(buf); + self.info.put(buf); + }, + } + put_sharing(&self.sharing, buf); + put_refs(&self.refs, buf); + put_univs(&self.univs, buf); + } + + pub fn get(buf: &mut &[u8]) -> Result { + let tag = Tag4::get(buf)?; + let info = match tag.flag { + Self::FLAG_MUTS => { + // Muts: size field is entry count + let mut mutuals = Vec::with_capacity(capped_capacity(tag.size, buf)); + for _ in 0..tag.size { + mutuals.push(MutConst::get(buf)?); + } + ConstantInfo::Muts(mutuals) + }, + Self::FLAG => { + // Non-Muts: size field is variant + ConstantInfo::get(tag.size, buf)? + }, + _ => { + return Err(format!( + "Constant::get: expected flag {} or {}, got {}", + Self::FLAG, + Self::FLAG_MUTS, + tag.flag + )); + }, + }; + let sharing = get_sharing(buf)?; + let refs = get_refs(buf)?; + let univs = get_univs(buf)?; + Ok(Constant { info, sharing, refs, univs }) + } + + /// Serialize a constant and compute its content address. + pub fn commit(&self) -> (Address, Vec) { + let mut buf = Vec::new(); + self.put(&mut buf); + let addr = Address::hash(&buf); + (addr, buf) + } +} + +// ============================================================================ +// Name serialization +// ============================================================================ + +use crate::ix::env::{Name, NameData}; +use crate::lean::nat::Nat; +use rustc_hash::FxHashMap; + +/// Serialize a Name to bytes (full recursive serialization, for standalone use). +pub fn put_name(name: &Name, buf: &mut Vec) { + match name.as_data() { + NameData::Anonymous(_) => { + put_u8(0, buf); + }, + NameData::Str(parent, s, _) => { + put_u8(1, buf); + put_name(parent, buf); + put_u64(s.len() as u64, buf); + buf.extend_from_slice(s.as_bytes()); + }, + NameData::Num(parent, n, _) => { + put_u8(2, buf); + put_name(parent, buf); + let bytes = n.to_le_bytes(); + put_u64(bytes.len() as u64, buf); + buf.extend_from_slice(&bytes); + }, + } +} + +/// Deserialize a Name from bytes (full recursive deserialization). +pub fn get_name(buf: &mut &[u8]) -> Result { + match get_u8(buf)? { + 0 => Ok(Name::anon()), + 1 => { + let parent = get_name(buf)?; + let len = get_u64(buf)? as usize; + if buf.len() < len { + return Err(format!( + "get_name: need {} bytes for string, have {}", + len, + buf.len() + )); + } + let (s_bytes, rest) = buf.split_at(len); + *buf = rest; + let s = String::from_utf8(s_bytes.to_vec()) + .map_err(|_| "get_name: invalid UTF-8")?; + Ok(Name::str(parent, s)) + }, + 2 => { + let parent = get_name(buf)?; + let len = get_u64(buf)? as usize; + if buf.len() < len { + return Err(format!( + "get_name: need {} bytes for nat, have {}", + len, + buf.len() + )); + } + let (n_bytes, rest) = buf.split_at(len); + *buf = rest; + let n = Nat::from_le_bytes(n_bytes); + Ok(Name::num(parent, n)) + }, + x => Err(format!("get_name: invalid tag {x}")), + } +} + +/// Serialize a Name component (references parent by address). +/// Format: tag (1 byte) + parent_addr (32 bytes) + data +fn put_name_component(name: &Name, buf: &mut Vec) { + match name.as_data() { + NameData::Anonymous(_) => { + put_u8(0, buf); + }, + NameData::Str(parent, s, _) => { + put_u8(1, buf); + put_bytes(parent.get_hash().as_bytes(), buf); + put_u64(s.len() as u64, buf); + buf.extend_from_slice(s.as_bytes()); + }, + NameData::Num(parent, n, _) => { + put_u8(2, buf); + put_bytes(parent.get_hash().as_bytes(), buf); + let bytes = n.to_le_bytes(); + put_u64(bytes.len() as u64, buf); + buf.extend_from_slice(&bytes); + }, + } +} + +/// Deserialize a Name component using a lookup table for parents. +fn get_name_component( + buf: &mut &[u8], + names: &FxHashMap, +) -> Result { + match get_u8(buf)? { + 0 => Ok(Name::anon()), + 1 => { + let parent_addr = get_address(buf)?; + let parent = names.get(&parent_addr).cloned().ok_or_else(|| { + format!("get_name_component: missing parent {:?}", parent_addr) + })?; + let len = get_u64(buf)? as usize; + if buf.len() < len { + return Err(format!( + "get_name_component: need {} bytes, have {}", + len, + buf.len() + )); + } + let (s_bytes, rest) = buf.split_at(len); + *buf = rest; + let s = String::from_utf8(s_bytes.to_vec()) + .map_err(|_| "get_name_component: invalid UTF-8")?; + Ok(Name::str(parent, s)) + }, + 2 => { + let parent_addr = get_address(buf)?; + let parent = names.get(&parent_addr).cloned().ok_or_else(|| { + format!("get_name_component: missing parent {:?}", parent_addr) + })?; + let len = get_u64(buf)? as usize; + if buf.len() < len { + return Err(format!( + "get_name_component: need {} bytes, have {}", + len, + buf.len() + )); + } + let (n_bytes, rest) = buf.split_at(len); + *buf = rest; + let n = Nat::from_le_bytes(n_bytes); + Ok(Name::num(parent, n)) + }, + x => Err(format!("get_name_component: invalid tag {x}")), + } +} + +// ============================================================================ +// Named serialization +// ============================================================================ + +use super::env::Named; +use super::metadata::{ConstantMeta, NameIndex, NameReverseIndex}; + +/// Serialize a Named entry with indexed metadata. +pub fn put_named_indexed( + named: &Named, + idx: &NameIndex, + buf: &mut Vec, +) -> Result<(), String> { + put_address(&named.addr, buf); + named.meta.put_indexed(idx, buf)?; + Ok(()) +} + +/// Deserialize a Named entry with indexed metadata. +pub fn get_named_indexed( + buf: &mut &[u8], + rev: &NameReverseIndex, +) -> Result { + let addr = get_address(buf)?; + let meta = ConstantMeta::get_indexed(buf, rev)?; + Ok(Named { addr, meta }) +} + +// ============================================================================ +// Env serialization +// ============================================================================ + +use super::comm::Comm; +use super::env::Env; + +impl Env { + /// Tag4 flag for Env (0xE), variant 0. + pub const FLAG: u8 = 0xE; + + /// Serialize an Env to bytes. + pub fn put(&self, buf: &mut Vec) -> Result<(), String> { + // Header: Tag4 with flag=0xE, size=0 (Env variant) + Tag4::new(Self::FLAG, 0).put(buf); + + // Section 1: Blobs (Address -> bytes) + // Sort by address for deterministic serialization (matches Lean) + let mut blobs: Vec<_> = + self.blobs.iter().map(|e| (e.key().clone(), e.value().clone())).collect(); + blobs.sort_by(|a, b| a.0.cmp(&b.0)); + put_u64(blobs.len() as u64, buf); + for (addr, bytes) in &blobs { + put_address(addr, buf); + put_u64(bytes.len() as u64, buf); + buf.extend_from_slice(bytes); + } + + // Section 2: Consts (Address -> Constant) + // Sort by address for deterministic serialization (matches Lean) + let mut consts: Vec<_> = self + .consts + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + consts.sort_by(|a, b| a.0.cmp(&b.0)); + put_u64(consts.len() as u64, buf); + for (addr, constant) in &consts { + put_address(addr, buf); + constant.put(buf); + } + + // Section 3: Names (Address -> Name component) + // Topologically sorted so parents come before children + // Also build name index for metadata serialization + let sorted_names = topological_sort_names(&self.names); + let mut name_index: NameIndex = NameIndex::new(); + put_u64(sorted_names.len() as u64, buf); + for (i, (addr, name)) in sorted_names.iter().enumerate() { + name_index.insert(addr.clone(), i as u64); + put_address(addr, buf); + put_name_component(name, buf); + } + + // Section 4: Named (name Address -> Named) + // Sort by name hash for deterministic serialization (matches Lean) + // Use indexed serialization for metadata (saves ~24 bytes per address) + let mut named: Vec<_> = + self.named.iter().map(|e| (e.key().clone(), e.value().clone())).collect(); + named + .sort_by(|a, b| a.0.get_hash().as_bytes().cmp(b.0.get_hash().as_bytes())); + put_u64(named.len() as u64, buf); + for (name, named_entry) in &named { + put_bytes(name.get_hash().as_bytes(), buf); + put_named_indexed(named_entry, &name_index, buf)?; + } + + // Section 5: Comms (Address -> Comm) + // Sort by address for deterministic serialization (matches Lean) + let mut comms: Vec<_> = + self.comms.iter().map(|e| (e.key().clone(), e.value().clone())).collect(); + comms.sort_by(|a, b| a.0.cmp(&b.0)); + put_u64(comms.len() as u64, buf); + for (addr, comm) in &comms { + put_address(addr, buf); + comm.put(buf); + } + Ok(()) + } + + /// Deserialize an Env from bytes. + pub fn get(buf: &mut &[u8]) -> Result { + // Header + let tag = Tag4::get(buf)?; + if tag.flag != Self::FLAG { + return Err(format!( + "Env::get: expected flag 0x{:X}, got 0x{:X}", + Self::FLAG, + tag.flag + )); + } + if tag.size != 0 { + return Err(format!( + "Env::get: expected Env variant 0, got {}", + tag.size + )); + } + + let env = Env::new(); + + // Section 1: Blobs + let num_blobs = get_u64(buf)?; + for _ in 0..num_blobs { + let addr = get_address(buf)?; + let len = get_u64(buf)? as usize; + if buf.len() < len { + return Err(format!( + "Env::get: need {} bytes for blob, have {}", + len, + buf.len() + )); + } + let (bytes, rest) = buf.split_at(len); + *buf = rest; + env.blobs.insert(addr, bytes.to_vec()); + } + + // Section 2: Consts + let num_consts = get_u64(buf)?; + for _ in 0..num_consts { + let addr = get_address(buf)?; + let constant = Constant::get(buf)?; + env.consts.insert(addr, constant); + } + + // Section 3: Names (build lookup table and reverse index for metadata) + let num_names = get_u64(buf)?; + let mut names_lookup: FxHashMap = FxHashMap::default(); + let mut name_reverse_index: NameReverseIndex = + Vec::with_capacity(num_names as usize + 1); + // Anonymous name is serialized first (index 0) — read it from the stream + // along with all other names below. But pre-seed the lookup so name + // reconstruction works for names whose parent is anonymous. + let anon_addr = Address::from_blake3_hash(*Name::anon().get_hash()); + names_lookup.insert(anon_addr.clone(), Name::anon()); + env.names.insert(anon_addr, Name::anon()); + for _ in 0..num_names { + let addr = get_address(buf)?; + let name = get_name_component(buf, &names_lookup)?; + name_reverse_index.push(addr.clone()); + names_lookup.insert(addr.clone(), name.clone()); + env.names.insert(addr, name); + } + + // Section 4: Named (use indexed deserialization for metadata) + let num_named = get_u64(buf)?; + for _ in 0..num_named { + let name_addr = get_address(buf)?; + let named = get_named_indexed(buf, &name_reverse_index)?; + let name = names_lookup.get(&name_addr).cloned().ok_or_else(|| { + format!("Env::get: missing name for addr {:?}", name_addr) + })?; + env.addr_to_name.insert(named.addr.clone(), name.clone()); + env.named.insert(name, named); + } + + // Section 5: Comms + let num_comms = get_u64(buf)?; + for _ in 0..num_comms { + let addr = get_address(buf)?; + let comm = Comm::get(buf)?; + env.comms.insert(addr, comm); + } + + Ok(env) + } + + /// Calculate the serialized size of an Env. + pub fn serialized_size(&self) -> Result { + let mut buf = Vec::new(); + self.put(&mut buf)?; + Ok(buf.len()) + } + + /// Calculate serialized size with breakdown by section. + pub fn serialized_size_breakdown( + &self, + ) -> Result<(usize, usize, usize, usize, usize, usize), String> { + let mut buf = Vec::new(); + + // Header + Tag4::new(Self::FLAG, 0).put(&mut buf); + let header_size = buf.len(); + + // Section 1: Blobs + put_u64(self.blobs.len() as u64, &mut buf); + for entry in self.blobs.iter() { + put_address(entry.key(), &mut buf); + put_u64(entry.value().len() as u64, &mut buf); + buf.extend_from_slice(entry.value()); + } + let blobs_size = buf.len() - header_size; + + // Section 2: Consts + let before_consts = buf.len(); + put_u64(self.consts.len() as u64, &mut buf); + for entry in self.consts.iter() { + put_address(entry.key(), &mut buf); + entry.value().put(&mut buf); + } + let consts_size = buf.len() - before_consts; + + // Section 3: Names (also build name index) + let before_names = buf.len(); + let sorted_names = topological_sort_names(&self.names); + let mut name_index: NameIndex = NameIndex::new(); + put_u64(sorted_names.len() as u64, &mut buf); + for (i, (addr, name)) in sorted_names.iter().enumerate() { + name_index.insert(addr.clone(), i as u64); + put_address(addr, &mut buf); + put_name_component(name, &mut buf); + } + let names_size = buf.len() - before_names; + + // Section 4: Named (use indexed serialization) + let before_named = buf.len(); + put_u64(self.named.len() as u64, &mut buf); + for entry in self.named.iter() { + put_bytes(entry.key().get_hash().as_bytes(), &mut buf); + put_named_indexed(entry.value(), &name_index, &mut buf)?; + } + let named_size = buf.len() - before_named; + + // Section 5: Comms + let before_comms = buf.len(); + put_u64(self.comms.len() as u64, &mut buf); + for entry in self.comms.iter() { + put_address(entry.key(), &mut buf); + entry.value().put(&mut buf); + } + let comms_size = buf.len() - before_comms; + + Ok(( + header_size, + blobs_size, + consts_size, + names_size, + named_size, + comms_size, + )) + } +} + +/// Topologically sort names so parents come before children. +fn topological_sort_names( + names: &dashmap::DashMap, +) -> Vec<(Address, Name)> { + use std::collections::HashSet; + + let mut result = Vec::with_capacity(names.len() + 1); + let mut visited: HashSet
= HashSet::new(); + + // Include anonymous name first so it gets index 0 in the name index. + // Arena nodes frequently reference it as a binder name. + let anon_addr = Address::from_blake3_hash(*Name::anon().get_hash()); + result.push((anon_addr.clone(), Name::anon())); + visited.insert(anon_addr); + + fn visit( + name: &Name, + visited: &mut HashSet
, + result: &mut Vec<(Address, Name)>, + ) { + let addr = Address::from_blake3_hash(*name.get_hash()); + if visited.contains(&addr) { + return; + } + + // Visit parent first + match name.as_data() { + NameData::Anonymous(_) => {}, + NameData::Str(parent, _, _) | NameData::Num(parent, _, _) => { + visit(parent, visited, result); + }, + } + + visited.insert(addr.clone()); + result.push((addr, name.clone())); + } + + // Sort entries by address before DFS for deterministic order (matches Lean) + let mut sorted_entries: Vec<_> = + names.iter().map(|e| (e.key().clone(), e.value().clone())).collect(); + sorted_entries.sort_by(|a, b| a.0.cmp(&b.0)); + for (_, name) in &sorted_entries { + visit(name, &mut visited, &mut result); + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ix::ixon::constant::tests::gen_constant; + use crate::ix::ixon::tests::gen_range; + use quickcheck::{Arbitrary, Gen}; + + #[quickcheck] + fn prop_pack_bools_roundtrip(x: Vec) -> bool { + let mut bools = x; + bools.truncate(8); + bools == unpack_bools(bools.len(), pack_bools(bools.clone())) + } + + #[test] + fn test_pack_bools_specific() { + assert_eq!(pack_bools([true, false, true]), 0b101); + assert_eq!(pack_bools([false, false, false, false, true]), 0b10000); + assert_eq!(unpack_bools(3, 0b101), vec![true, false, true]); + assert_eq!( + unpack_bools(5, 0b10000), + vec![false, false, false, false, true] + ); + } + + #[test] + fn test_name_roundtrip() { + let names = vec![ + Name::anon(), + Name::str(Name::anon(), "foo".to_string()), + Name::num(Name::anon(), Nat::from(42u64)), + Name::str(Name::str(Name::anon(), "a".to_string()), "b".to_string()), + Name::num(Name::str(Name::anon(), "x".to_string()), Nat::from(123u64)), + ]; + + for name in names { + let mut buf = Vec::new(); + put_name(&name, &mut buf); + let recovered = get_name(&mut buf.as_slice()).unwrap(); + assert_eq!(name, recovered, "Name roundtrip failed"); + } + } + + #[test] + fn test_env_roundtrip_empty() { + let env = Env::new(); + let mut buf = Vec::new(); + env.put(&mut buf).unwrap(); + let recovered = Env::get(&mut buf.as_slice()).unwrap(); + assert_eq!(env.blobs.len(), recovered.blobs.len()); + assert_eq!(env.consts.len(), recovered.consts.len()); + assert_eq!(env.named.len(), recovered.named.len()); + assert_eq!(env.comms.len(), recovered.comms.len()); + } + + // ========== Arbitrary generators for Env ========== + + fn gen_string(g: &mut Gen) -> String { + let len = gen_range(g, 1..20); + (0..len) + .map(|_| { + let c: u8 = Arbitrary::arbitrary(g); + let idx = c % 62; + // ASCII letters/numbers only: a-z (0-25), A-Z (26-51), 0-9 (52-61) + let ch = if idx < 26 { + b'a' + idx + } else if idx < 52 { + b'A' + (idx - 26) + } else { + b'0' + (idx - 52) + }; + ch as char + }) + .collect() + } + + fn gen_name(g: &mut Gen, depth: usize) -> Name { + if depth == 0 { + Name::anon() + } else { + let parent = gen_name(g, depth - 1); + let use_str: bool = Arbitrary::arbitrary(g); + if use_str { + Name::str(parent, gen_string(g)) + } else { + let n: u64 = Arbitrary::arbitrary(g); + Name::num(parent, Nat::from(n)) + } + } + } + + fn gen_blob(g: &mut Gen) -> Vec { + let len = gen_range(g, 1..100); + (0..len).map(|_| Arbitrary::arbitrary(g)).collect() + } + + fn gen_env(g: &mut Gen) -> Env { + let env = Env::new(); + + // Generate blobs + let num_blobs = gen_range(g, 0..10); + for _ in 0..num_blobs { + let blob = gen_blob(g); + env.store_blob(blob); + } + + // Generate names (with varying depths) + let num_names = gen_range(g, 1..20); + let mut names: Vec = Vec::new(); + for _ in 0..num_names { + let depth = gen_range(g, 1..5); + let name = gen_name(g, depth); + let addr = Address::from_blake3_hash(*name.get_hash()); + env.names.insert(addr, name.clone()); + names.push(name); + } + + // Generate constants and named entries + let num_consts = gen_range(g, 0..10); + for i in 0..num_consts { + let constant = gen_constant(g); + let mut buf = Vec::new(); + constant.put(&mut buf); + let addr = Address::hash(&buf); + env.consts.insert(addr.clone(), constant); + + // Create a named entry for this constant + if !names.is_empty() { + let name = names[i % names.len()].clone(); + let meta = ConstantMeta::default(); + let named = Named { addr: addr.clone(), meta }; + env.addr_to_name.insert(addr, name.clone()); + env.named.insert(name, named); + } + } + + // Generate comms + let num_comms = gen_range(g, 0..5); + for _ in 0..num_comms { + let comm = Comm::arbitrary(g); + let addr = Address::arbitrary(g); + env.comms.insert(addr, comm); + } + + env + } + + #[derive(Debug, Clone)] + struct ArbitraryEnv(Env); + + impl Arbitrary for ArbitraryEnv { + fn arbitrary(g: &mut Gen) -> Self { + ArbitraryEnv(gen_env(g)) + } + } + + fn env_roundtrip(env: &Env) -> bool { + let mut buf = Vec::new(); + if let Err(e) = env.put(&mut buf) { + eprintln!("Env::put failed: {}", e); + return false; + } + match Env::get(&mut buf.as_slice()) { + Ok(recovered) => { + // Check counts match + if env.blobs.len() != recovered.blobs.len() { + eprintln!( + "blobs mismatch: {} vs {}", + env.blobs.len(), + recovered.blobs.len() + ); + return false; + } + if env.consts.len() != recovered.consts.len() { + eprintln!( + "consts mismatch: {} vs {}", + env.consts.len(), + recovered.consts.len() + ); + return false; + } + if env.named.len() != recovered.named.len() { + eprintln!( + "named mismatch: {} vs {}", + env.named.len(), + recovered.named.len() + ); + return false; + } + if env.comms.len() != recovered.comms.len() { + eprintln!( + "comms mismatch: {} vs {}", + env.comms.len(), + recovered.comms.len() + ); + return false; + } + + // Check blobs content + for entry in env.blobs.iter() { + match recovered.blobs.get(entry.key()) { + Some(v) if v.value() == entry.value() => {}, + _ => { + eprintln!("blob content mismatch for {:?}", entry.key()); + return false; + }, + } + } + + // Check consts content + for entry in env.consts.iter() { + match recovered.consts.get(entry.key()) { + Some(v) if v.value() == entry.value() => {}, + _ => { + eprintln!("const content mismatch for {:?}", entry.key()); + return false; + }, + } + } + + // Check named content + for entry in env.named.iter() { + match recovered.named.get(entry.key()) { + Some(v) if v.addr == entry.value().addr => {}, + _ => { + eprintln!("named content mismatch for {:?}", entry.key()); + return false; + }, + } + } + + // Check comms content + for entry in env.comms.iter() { + match recovered.comms.get(entry.key()) { + Some(v) if v.value() == entry.value() => {}, + _ => { + eprintln!("comm content mismatch for {:?}", entry.key()); + return false; + }, + } + } + + true + }, + Err(e) => { + eprintln!("env_roundtrip error: {}", e); + false + }, + } + } + + #[quickcheck] + fn prop_env_roundtrip(env: ArbitraryEnv) -> bool { + env_roundtrip(&env.0) + } + + #[test] + fn test_env_roundtrip_with_data() { + let mut g = Gen::new(20); + for _ in 0..10 { + let env = gen_env(&mut g); + assert!(env_roundtrip(&env), "Env roundtrip failed"); + } + } +} diff --git a/src/ix/ixon/sharing.rs b/src/ix/ixon/sharing.rs new file mode 100644 index 00000000..6b9cef77 --- /dev/null +++ b/src/ix/ixon/sharing.rs @@ -0,0 +1,1067 @@ +//! Sharing analysis for expression deduplication within mutual blocks. +//! +//! This module provides alpha-invariant sharing analysis using Merkle-tree hashing. +//! Expressions that are structurally identical get the same hash, and we decide +//! which subterms to share based on a profitability heuristic. + +#![allow(clippy::cast_possible_truncation)] +#![allow(clippy::cast_precision_loss)] +#![allow(clippy::cast_possible_wrap)] +#![allow(clippy::match_same_arms)] + +use std::collections::HashMap; +use std::sync::Arc; + +use indexmap::IndexSet; +use rustc_hash::FxHashMap; + +use super::expr::Expr; +use super::tag::{Tag0, Tag4}; + +/// Information about a subterm for sharing analysis. +#[derive(Debug)] +pub struct SubtermInfo { + /// Base size of this node alone (Tag4 header, not including children) for Ixon format + pub base_size: usize, + /// Size in a fully hash-consed store (32-byte key + value with hash references) + pub hash_consed_size: usize, + /// Number of occurrences within this block + pub usage_count: usize, + /// Canonical representative expression + pub expr: Arc, + /// Hashes of child subterms (for topological ordering) + pub children: Vec, +} + +/// Hash an expression node using Merkle-tree style hashing. +/// Returns (hash, child_hashes, value_size) where value_size is the size of the +/// serialized node value in a hash-consed store (not including the 32-byte key). +fn hash_node( + expr: &Expr, + child_hashes: &FxHashMap<*const Expr, blake3::Hash>, + buf: &mut Vec, +) -> (blake3::Hash, Vec, usize) { + buf.clear(); + + let children = match expr { + Expr::Sort(univ_idx) => { + buf.push(Expr::FLAG_SORT); + buf.extend_from_slice(&univ_idx.to_le_bytes()); + vec![] + }, + Expr::Var(idx) => { + buf.push(Expr::FLAG_VAR); + buf.extend_from_slice(&idx.to_le_bytes()); + vec![] + }, + Expr::Ref(ref_idx, univ_indices) => { + buf.push(Expr::FLAG_REF); + buf.extend_from_slice(&ref_idx.to_le_bytes()); + buf.extend_from_slice(&(univ_indices.len() as u64).to_le_bytes()); + for idx in univ_indices { + buf.extend_from_slice(&idx.to_le_bytes()); + } + vec![] + }, + Expr::Rec(rec_idx, univ_indices) => { + buf.push(Expr::FLAG_REC); + buf.extend_from_slice(&rec_idx.to_le_bytes()); + buf.extend_from_slice(&(univ_indices.len() as u64).to_le_bytes()); + for idx in univ_indices { + buf.extend_from_slice(&idx.to_le_bytes()); + } + vec![] + }, + Expr::Prj(type_ref_idx, field_idx, val) => { + buf.push(Expr::FLAG_PRJ); + buf.extend_from_slice(&type_ref_idx.to_le_bytes()); + buf.extend_from_slice(&field_idx.to_le_bytes()); + let val_ptr = val.as_ref() as *const Expr; + let val_hash = child_hashes.get(&val_ptr).unwrap(); + buf.extend_from_slice(val_hash.as_bytes()); + vec![*val_hash] + }, + Expr::Str(ref_idx) => { + buf.push(Expr::FLAG_STR); + buf.extend_from_slice(&ref_idx.to_le_bytes()); + vec![] + }, + Expr::Nat(ref_idx) => { + buf.push(Expr::FLAG_NAT); + buf.extend_from_slice(&ref_idx.to_le_bytes()); + vec![] + }, + Expr::App(fun, arg) => { + buf.push(Expr::FLAG_APP); + let fun_ptr = fun.as_ref() as *const Expr; + let arg_ptr = arg.as_ref() as *const Expr; + let fun_hash = child_hashes.get(&fun_ptr).unwrap(); + let arg_hash = child_hashes.get(&arg_ptr).unwrap(); + buf.extend_from_slice(fun_hash.as_bytes()); + buf.extend_from_slice(arg_hash.as_bytes()); + vec![*fun_hash, *arg_hash] + }, + Expr::Lam(ty, body) => { + buf.push(Expr::FLAG_LAM); + let ty_ptr = ty.as_ref() as *const Expr; + let body_ptr = body.as_ref() as *const Expr; + let ty_hash = child_hashes.get(&ty_ptr).unwrap(); + let body_hash = child_hashes.get(&body_ptr).unwrap(); + buf.extend_from_slice(ty_hash.as_bytes()); + buf.extend_from_slice(body_hash.as_bytes()); + vec![*ty_hash, *body_hash] + }, + Expr::All(ty, body) => { + buf.push(Expr::FLAG_ALL); + let ty_ptr = ty.as_ref() as *const Expr; + let body_ptr = body.as_ref() as *const Expr; + let ty_hash = child_hashes.get(&ty_ptr).unwrap(); + let body_hash = child_hashes.get(&body_ptr).unwrap(); + buf.extend_from_slice(ty_hash.as_bytes()); + buf.extend_from_slice(body_hash.as_bytes()); + vec![*ty_hash, *body_hash] + }, + Expr::Let(non_dep, ty, val, body) => { + buf.push(Expr::FLAG_LET); + buf.push(if *non_dep { 1 } else { 0 }); // size field encodes non_dep + let ty_ptr = ty.as_ref() as *const Expr; + let val_ptr = val.as_ref() as *const Expr; + let body_ptr = body.as_ref() as *const Expr; + let ty_hash = child_hashes.get(&ty_ptr).unwrap(); + let val_hash = child_hashes.get(&val_ptr).unwrap(); + let body_hash = child_hashes.get(&body_ptr).unwrap(); + buf.extend_from_slice(ty_hash.as_bytes()); + buf.extend_from_slice(val_hash.as_bytes()); + buf.extend_from_slice(body_hash.as_bytes()); + vec![*ty_hash, *val_hash, *body_hash] + }, + Expr::Share(idx) => { + buf.push(Expr::FLAG_SHARE); + buf.extend_from_slice(&idx.to_le_bytes()); + vec![] + }, + }; + + let value_size = buf.len(); + (blake3::hash(buf), children, value_size) +} + +/// Compute the base size of a node (Tag4 header size) for Ixon serialization. +fn compute_base_size(expr: &Expr) -> usize { + match expr { + Expr::Sort(univ_idx) => { + Tag4::new(Expr::FLAG_SORT, *univ_idx).encoded_size() + }, + Expr::Var(idx) => Tag4::new(Expr::FLAG_VAR, *idx).encoded_size(), + Expr::Ref(ref_idx, univ_indices) => { + // tag + ref_idx + N univ indices + Tag4::new(Expr::FLAG_REF, univ_indices.len() as u64).encoded_size() + + Tag0::new(*ref_idx).encoded_size() + + univ_indices + .iter() + .map(|i| Tag0::new(*i).encoded_size()) + .sum::() + }, + Expr::Rec(rec_idx, univ_indices) => { + // tag + rec_idx + N univ indices + Tag4::new(Expr::FLAG_REC, univ_indices.len() as u64).encoded_size() + + Tag0::new(*rec_idx).encoded_size() + + univ_indices + .iter() + .map(|i| Tag0::new(*i).encoded_size()) + .sum::() + }, + Expr::Prj(type_ref_idx, field_idx, _) => { + // Tag (field_idx in payload) + type_ref_idx (variable length, estimate 2 bytes) + Tag4::new(Expr::FLAG_PRJ, *field_idx).encoded_size() + + Tag0::new(*type_ref_idx).encoded_size() + }, + Expr::Str(ref_idx) => Tag4::new(Expr::FLAG_STR, *ref_idx).encoded_size(), + Expr::Nat(ref_idx) => Tag4::new(Expr::FLAG_NAT, *ref_idx).encoded_size(), + Expr::App(..) => Tag4::new(Expr::FLAG_APP, 1).encoded_size(), // telescope count >= 1 + Expr::Lam(..) => Tag4::new(Expr::FLAG_LAM, 1).encoded_size(), + Expr::All(..) => Tag4::new(Expr::FLAG_ALL, 1).encoded_size(), + Expr::Let(non_dep, ..) => { + // size=0 for dep, size=1 for non_dep + Tag4::new(Expr::FLAG_LET, if *non_dep { 1 } else { 0 }).encoded_size() + }, + Expr::Share(idx) => Tag4::new(Expr::FLAG_SHARE, *idx).encoded_size(), + } +} + +/// Get child expressions for traversal. +fn get_children(expr: &Expr) -> Vec<&Arc> { + match expr { + Expr::Sort(_) + | Expr::Var(_) + | Expr::Ref(..) + | Expr::Rec(..) + | Expr::Str(_) + | Expr::Nat(_) + | Expr::Share(_) => { + vec![] + }, + Expr::Prj(_, _, val) => vec![val], + Expr::App(fun, arg) => vec![fun, arg], + Expr::Lam(ty, body) | Expr::All(ty, body) => vec![ty, body], + Expr::Let(_, ty, val, body) => vec![ty, val, body], + } +} + +/// Analyze expressions for sharing opportunities within a block. +/// +/// Returns a map from content hash to SubtermInfo, and a map from pointer to hash. +/// Uses a two-phase algorithm: +/// 1. Build DAG structure via post-order traversal with Merkle-tree hashing +/// 2. Propagate usage counts structurally from roots to leaves (O(n) total) +/// +/// If `track_hash_consed_size` is true, computes the hash-consed size for each +/// subterm (32-byte key + value). This adds overhead and can be disabled when +/// only sharing analysis is needed. +pub fn analyze_block( + exprs: &[Arc], + track_hash_consed_size: bool, +) -> (HashMap, FxHashMap<*const Expr, blake3::Hash>) +{ + let mut info_map: HashMap = HashMap::new(); + let mut ptr_to_hash: FxHashMap<*const Expr, blake3::Hash> = + FxHashMap::default(); + let mut hash_buf: Vec = Vec::with_capacity(128); + + // Phase 1: Build DAG structure via post-order traversal + // Don't compute usage counts here - just build the hash→children mapping + enum Frame<'a> { + Visit(&'a Arc), + Process(&'a Arc), + } + + for root in exprs { + let mut stack: Vec> = vec![Frame::Visit(root)]; + + while let Some(frame) = stack.pop() { + match frame { + Frame::Visit(arc_expr) => { + let ptr = arc_expr.as_ref() as *const Expr; + + // Already processed this pointer - just skip + // Usage counts will be computed in phase 2 + if ptr_to_hash.contains_key(&ptr) { + continue; + } + + // Push process frame, then children (in reverse for correct order) + stack.push(Frame::Process(arc_expr)); + for child in get_children(arc_expr).into_iter().rev() { + stack.push(Frame::Visit(child)); + } + }, + Frame::Process(arc_expr) => { + let ptr = arc_expr.as_ref() as *const Expr; + if ptr_to_hash.contains_key(&ptr) { + continue; + } + + let (hash, children, value_size) = + hash_node(arc_expr.as_ref(), &ptr_to_hash, &mut hash_buf); + + // Add to ptr_to_hash cache + ptr_to_hash.insert(ptr, hash); + + // Add to info_map if not already present (same content hash from different pointer) + info_map.entry(hash).or_insert_with(|| { + let base_size = compute_base_size(arc_expr.as_ref()); + let hash_consed_size = + if track_hash_consed_size { 32 + value_size } else { 0 }; + SubtermInfo { + base_size, + hash_consed_size, + usage_count: 0, // Will be computed in phase 2 + expr: arc_expr.clone(), + children, + } + }); + }, + } + } + } + + // Phase 2: Propagate usage counts structurally from roots to leaves + // This is O(n) total - no subtree walks needed + // + // Algorithm: + // 1. Each root expression contributes 1 to its hash's count + // 2. Process in reverse topological order (roots first, leaves last) + // 3. For each node, add its count to each child's count (with multiplicity) + + // Count root contributions + for root in exprs { + let ptr = root.as_ref() as *const Expr; + if let Some(hash) = ptr_to_hash.get(&ptr) + && let Some(info) = info_map.get_mut(hash) + { + info.usage_count += 1; + } + } + + // Get topological order (leaves first) and reverse it (roots first) + let topo_order = topological_sort(&info_map); + + // Propagate counts from roots to leaves + for hash in topo_order.iter().rev() { + // Get this node's count and children + let (count, children) = { + let info = info_map.get(hash).unwrap(); + (info.usage_count, info.children.clone()) + }; + + // Add this node's count to each child (with multiplicity from children array) + for child_hash in children { + if let Some(child_info) = info_map.get_mut(&child_hash) { + child_info.usage_count += count; + } + } + } + + (info_map, ptr_to_hash) +} + +/// Compute the hash of a single expression. +/// This is useful for testing hash compatibility with Lean. +pub fn hash_expr(expr: &Arc) -> blake3::Hash { + let (_info_map, ptr_to_hash) = + analyze_block(std::slice::from_ref(expr), false); + let ptr = expr.as_ref() as *const Expr; + *ptr_to_hash.get(&ptr).expect("Expression not found in ptr_to_hash") +} + +/// Topological sort of subterms (leaves first, parents last). +/// CRITICAL: Keys are sorted by hash bytes for deterministic output. +/// This ensures Lean and Rust produce the same topological order. +pub fn topological_sort( + info_map: &HashMap, +) -> Vec { + #[derive(Clone, Copy, PartialEq, Eq)] + enum VisitState { + InProgress, + Done, + } + + let mut state: HashMap = HashMap::new(); + let mut result: Vec = Vec::new(); + + fn visit( + hash: blake3::Hash, + info_map: &HashMap, + state: &mut HashMap, + result: &mut Vec, + ) { + match state.get(&hash) { + Some(VisitState::Done) => return, + Some(VisitState::InProgress) => return, // Cycle (shouldn't happen) + _ => {}, + } + + state.insert(hash, VisitState::InProgress); + + if let Some(info) = info_map.get(&hash) { + for child in &info.children { + visit(*child, info_map, state, result); + } + } + + state.insert(hash, VisitState::Done); + result.push(hash); + } + + // Sort keys deterministically by hash bytes (lexicographic comparison) + let mut sorted_keys: Vec = info_map.keys().cloned().collect(); + sorted_keys.sort_by_key(|h| *h.as_bytes()); + + for hash in sorted_keys { + visit(hash, info_map, &mut state, &mut result); + } + + result +} + +/// Compute effective sizes for all subterms in topological order. +/// Returns a map from hash to effective size (total serialized bytes). +pub fn compute_effective_sizes( + info_map: &HashMap, + topo_order: &[blake3::Hash], +) -> HashMap { + let mut sizes: HashMap = HashMap::new(); + + for hash in topo_order { + if let Some(info) = info_map.get(hash) { + let mut size = info.base_size; + for child_hash in &info.children { + size += sizes.get(child_hash).copied().unwrap_or(0); + } + sizes.insert(*hash, size); + } + } + + sizes +} + +/// Analyze sharing statistics for debugging pathological cases. +/// Returns a summary of why sharing may not be effective. +#[allow(dead_code)] +pub fn analyze_sharing_stats( + info_map: &HashMap, +) -> SharingStats { + let topo_order = topological_sort(info_map); + let effective_sizes = compute_effective_sizes(info_map, &topo_order); + + let total_subterms = info_map.len(); + let mut usage_distribution: HashMap = HashMap::new(); + let mut size_distribution: HashMap = HashMap::new(); + let mut total_usage: usize = 0; + let mut unique_subterms = 0; + let mut shared_subterms = 0; + + for (hash, info) in info_map.iter() { + total_usage += info.usage_count; + *usage_distribution.entry(info.usage_count).or_insert(0) += 1; + + let size = effective_sizes.get(hash).copied().unwrap_or(0); + let size_bucket = match size { + 0..=1 => 1, + 2..=4 => 4, + 5..=10 => 10, + 11..=50 => 50, + 51..=100 => 100, + _ => 1000, + }; + *size_distribution.entry(size_bucket).or_insert(0) += 1; + + if info.usage_count == 1 { + unique_subterms += 1; + } else { + shared_subterms += 1; + } + } + + // Count candidates at each filtering stage + let candidates_usage_ge_2: usize = + info_map.values().filter(|info| info.usage_count >= 2).count(); + + let candidates_positive_potential: usize = info_map + .iter() + .filter(|(_, info)| info.usage_count >= 2) + .filter(|(hash, info)| { + let term_size = effective_sizes.get(hash).copied().unwrap_or(0); + let n = info.usage_count; + let potential = (n as isize - 1) * (term_size as isize) - (n as isize); + potential > 0 + }) + .count(); + + // Simulate actual sharing to count how many pass + let mut simulated_shared = 0; + let mut candidates: Vec<_> = info_map + .iter() + .filter(|(_, info)| info.usage_count >= 2) + .filter_map(|(hash, info)| { + let term_size = *effective_sizes.get(hash)?; + let n = info.usage_count; + let potential = (n as isize - 1) * (term_size as isize) - (n as isize); + if potential > 0 { Some((term_size, n)) } else { None } + }) + .collect(); + + candidates.sort_unstable_by(|a, b| { + let pot_a = (a.1 as isize - 1) * (a.0 as isize); + let pot_b = (b.1 as isize - 1) * (b.0 as isize); + pot_b.cmp(&pot_a) + }); + + for (term_size, usage_count) in candidates { + let next_ref_size = + Tag4::new(Expr::FLAG_SHARE, simulated_shared as u64).encoded_size(); + let n = usage_count as isize; + let savings = (n - 1) * (term_size as isize) - n * (next_ref_size as isize); + if savings > 0 { + simulated_shared += 1; + } + // Don't break - process all candidates + } + + SharingStats { + total_subterms, + unique_subterms, + shared_subterms, + total_usage, + candidates_usage_ge_2, + candidates_positive_potential, + actually_shared: simulated_shared, + usage_distribution, + size_distribution, + } +} + +/// Statistics about sharing analysis. +#[derive(Debug)] +pub struct SharingStats { + pub total_subterms: usize, + pub unique_subterms: usize, + pub shared_subterms: usize, + pub total_usage: usize, + pub candidates_usage_ge_2: usize, + pub candidates_positive_potential: usize, + pub actually_shared: usize, + pub usage_distribution: HashMap, + pub size_distribution: HashMap, +} + +impl std::fmt::Display for SharingStats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "=== Sharing Analysis ===")?; + writeln!(f, "Total unique subterms: {}", self.total_subterms)?; + writeln!(f, " - Unique (usage=1): {}", self.unique_subterms)?; + writeln!(f, " - Shared (usage>=2): {}", self.shared_subterms)?; + writeln!(f, "Total usage count: {}", self.total_usage)?; + writeln!( + f, + "Average usage: {:.2}", + if self.total_subterms > 0 { + self.total_usage as f64 / self.total_subterms as f64 + } else { + 0.0 + } + )?; + writeln!(f)?; + writeln!(f, "Filtering pipeline:")?; + writeln!( + f, + " 1. Candidates with usage >= 2: {}", + self.candidates_usage_ge_2 + )?; + writeln!( + f, + " 2. With positive potential: {}", + self.candidates_positive_potential + )?; + writeln!(f, " 3. Actually shared: {}", self.actually_shared)?; + writeln!(f)?; + writeln!(f, "Usage distribution:")?; + let mut usage_counts: Vec<_> = self.usage_distribution.iter().collect(); + usage_counts.sort_by_key(|(k, _)| *k); + for (usage, count) in usage_counts.iter().take(10) { + writeln!(f, " usage={}: {} subterms", usage, count)?; + } + if usage_counts.len() > 10 { + writeln!(f, " ... and {} more buckets", usage_counts.len() - 10)?; + } + writeln!(f)?; + writeln!(f, "Size distribution (effective_size buckets):")?; + let mut size_counts: Vec<_> = self.size_distribution.iter().collect(); + size_counts.sort_by_key(|(k, _)| *k); + for (size_bucket, count) in size_counts { + writeln!(f, " size<={}: {} subterms", size_bucket, count)?; + } + Ok(()) + } +} + +/// Decide which subterms to share based on profitability. +/// +/// Sharing is profitable when: `(N - 1) * term_size > N * share_ref_size` +/// where N is usage count, term_size is effective size, and share_ref_size +/// is the size of a Share(idx) reference at the current index. +/// +/// Optimized from O(k×n) to O(n log n) by pre-sorting candidates. +pub fn decide_sharing( + info_map: &HashMap, +) -> IndexSet { + let topo_order = topological_sort(info_map); + let effective_sizes = compute_effective_sizes(info_map, &topo_order); + + // Pre-filter and sort candidates by potential savings (assuming minimal ref_size=1) + // This gives us a stable ordering since relative savings don't change as ref_size grows + let mut candidates: Vec<_> = info_map + .iter() + .filter(|(_, info)| info.usage_count >= 2) + .filter_map(|(hash, info)| { + let term_size = *effective_sizes.get(hash)?; + let n = info.usage_count; + // Potential savings assuming ref_size = 1 (minimum) + let potential = (n as isize - 1) * (term_size as isize) - (n as isize); + if potential > 0 { Some((*hash, term_size, n)) } else { None } + }) + .collect(); + + // Sort by decreasing gross benefit, with hash bytes as tie-breaker for determinism + candidates.sort_unstable_by(|a, b| { + let gross_a = (a.2 as isize - 1) * (a.1 as isize); + let gross_b = (b.2 as isize - 1) * (b.1 as isize); + match gross_b.cmp(&gross_a) { + std::cmp::Ordering::Equal => a.0.as_bytes().cmp(b.0.as_bytes()), + other => other, + } + }); + + let mut shared: IndexSet = IndexSet::new(); + + // Process ALL candidates - don't break early! + // The early-break was incorrect: ref_size growth affects candidates differently + // based on their usage count. A high-usage small term may become unprofitable + // while a low-usage large term remains profitable. + for (hash, term_size, usage_count) in candidates { + let next_idx = shared.len(); + let next_ref_size = + Tag4::new(Expr::FLAG_SHARE, next_idx as u64).encoded_size(); + let n = usage_count as isize; + let savings = (n - 1) * (term_size as isize) - n * (next_ref_size as isize); + + if savings > 0 { + shared.insert(hash); + } + } + + shared +} + +/// Rewrite expressions to use Share(idx) references for shared subterms. +/// +/// Returns the rewritten expressions and the sharing vector. +pub fn build_sharing_vec( + exprs: &[Arc], + shared_hashes: &IndexSet, + ptr_to_hash: &FxHashMap<*const Expr, blake3::Hash>, + info_map: &HashMap, +) -> (Vec>, Vec>) { + // CRITICAL: Re-sort shared_hashes in topological order (leaves first). + // decide_sharing returns hashes sorted by gross benefit (large terms first), + // but we need leaves first so that when serializing sharing[i], all its + // children are already available as Share(j) for j < i. + let topo_order = topological_sort(info_map); + let shared_in_topo_order: Vec = + topo_order.into_iter().filter(|h| shared_hashes.contains(h)).collect(); + + // Build sharing vector incrementally to avoid forward references. + // When building sharing[i], only Share(j) for j < i is allowed. + let mut sharing_vec: Vec> = Vec::with_capacity(shared_hashes.len()); + let mut hash_to_idx: HashMap = HashMap::new(); + let mut cache: FxHashMap<*const Expr, Arc> = FxHashMap::default(); + + for h in &shared_in_topo_order { + let info = info_map.get(h).expect("shared hash must be in info_map"); + // Clear cache - hash_to_idx changed, so cached rewrites are invalid + cache.clear(); + // Rewrite using only indices < current length (hash_to_idx doesn't include this entry yet) + let rewritten = + rewrite_expr(&info.expr, &hash_to_idx, ptr_to_hash, &mut cache); + + let idx = sharing_vec.len() as u64; + sharing_vec.push(rewritten); + // Now add this hash to the map for subsequent entries + hash_to_idx.insert(*h, idx); + } + + // Rewrite the root expressions (can use all Share indices) + // Use a fresh cache since hash_to_idx is now complete + cache.clear(); + let rewritten_exprs: Vec> = exprs + .iter() + .map(|e| rewrite_expr(e, &hash_to_idx, ptr_to_hash, &mut cache)) + .collect(); + + (rewritten_exprs, sharing_vec) +} + +/// Frame for iterative rewrite traversal. +enum RewriteFrame<'a> { + /// Visit an expression (check cache/share, then push children) + Visit(&'a Arc), + /// Build a Prj node from rewritten children (type_ref_idx, field_idx) + BuildPrj(&'a Arc, u64, u64), + /// Build an App node from rewritten children + BuildApp(&'a Arc), + /// Build a Lam node from rewritten children + BuildLam(&'a Arc), + /// Build an All node from rewritten children + BuildAll(&'a Arc), + /// Build a Let node from rewritten children + BuildLet(&'a Arc, bool), +} + +/// Rewrite an expression tree to use Share(idx) references. +/// Uses iterative traversal with caching to handle deep trees and Arc sharing. +fn rewrite_expr( + expr: &Arc, + hash_to_idx: &HashMap, + ptr_to_hash: &FxHashMap<*const Expr, blake3::Hash>, + cache: &mut FxHashMap<*const Expr, Arc>, +) -> Arc { + let mut stack: Vec> = vec![RewriteFrame::Visit(expr)]; + let mut results: Vec> = Vec::new(); + + while let Some(frame) = stack.pop() { + match frame { + RewriteFrame::Visit(e) => { + let ptr = e.as_ref() as *const Expr; + + // Check cache first + if let Some(cached) = cache.get(&ptr) { + results.push(cached.clone()); + continue; + } + + // Check if this expression should become a Share reference + if let Some(hash) = ptr_to_hash.get(&ptr) + && let Some(&idx) = hash_to_idx.get(hash) + { + let share = Expr::share(idx); + cache.insert(ptr, share.clone()); + results.push(share); + continue; + } + + // Process based on node type + match e.as_ref() { + // Leaf nodes - return as-is + Expr::Sort(_) + | Expr::Var(_) + | Expr::Ref(..) + | Expr::Rec(..) + | Expr::Str(_) + | Expr::Nat(_) + | Expr::Share(_) => { + cache.insert(ptr, e.clone()); + results.push(e.clone()); + }, + + // Nodes with children - push build frame, then visit children + Expr::Prj(type_ref_idx, field_idx, val) => { + stack.push(RewriteFrame::BuildPrj(e, *type_ref_idx, *field_idx)); + stack.push(RewriteFrame::Visit(val)); + }, + Expr::App(fun, arg) => { + stack.push(RewriteFrame::BuildApp(e)); + stack.push(RewriteFrame::Visit(arg)); + stack.push(RewriteFrame::Visit(fun)); + }, + Expr::Lam(ty, body) => { + stack.push(RewriteFrame::BuildLam(e)); + stack.push(RewriteFrame::Visit(body)); + stack.push(RewriteFrame::Visit(ty)); + }, + Expr::All(ty, body) => { + stack.push(RewriteFrame::BuildAll(e)); + stack.push(RewriteFrame::Visit(body)); + stack.push(RewriteFrame::Visit(ty)); + }, + Expr::Let(non_dep, ty, val, body) => { + stack.push(RewriteFrame::BuildLet(e, *non_dep)); + stack.push(RewriteFrame::Visit(body)); + stack.push(RewriteFrame::Visit(val)); + stack.push(RewriteFrame::Visit(ty)); + }, + } + }, + + RewriteFrame::BuildPrj(orig, type_ref_idx, field_idx) => { + let new_val = results.pop().unwrap(); + let orig_val = match orig.as_ref() { + Expr::Prj(_, _, v) => v, + _ => unreachable!(), + }; + let result = if Arc::ptr_eq(&new_val, orig_val) { + orig.clone() + } else { + Expr::prj(type_ref_idx, field_idx, new_val) + }; + let ptr = orig.as_ref() as *const Expr; + cache.insert(ptr, result.clone()); + results.push(result); + }, + + RewriteFrame::BuildApp(orig) => { + // Pop in reverse order of push: arg was pushed last, fun first + let new_arg = results.pop().unwrap(); + let new_fun = results.pop().unwrap(); + let (orig_fun, orig_arg) = match orig.as_ref() { + Expr::App(f, a) => (f, a), + _ => unreachable!(), + }; + let result = if Arc::ptr_eq(&new_fun, orig_fun) + && Arc::ptr_eq(&new_arg, orig_arg) + { + orig.clone() + } else { + Expr::app(new_fun, new_arg) + }; + let ptr = orig.as_ref() as *const Expr; + cache.insert(ptr, result.clone()); + results.push(result); + }, + + RewriteFrame::BuildLam(orig) => { + // Pop in reverse order of push: body was pushed last, ty first + let new_body = results.pop().unwrap(); + let new_ty = results.pop().unwrap(); + let (orig_ty, orig_body) = match orig.as_ref() { + Expr::Lam(t, b) => (t, b), + _ => unreachable!(), + }; + let result = if Arc::ptr_eq(&new_ty, orig_ty) + && Arc::ptr_eq(&new_body, orig_body) + { + orig.clone() + } else { + Expr::lam(new_ty, new_body) + }; + let ptr = orig.as_ref() as *const Expr; + cache.insert(ptr, result.clone()); + results.push(result); + }, + + RewriteFrame::BuildAll(orig) => { + // Pop in reverse order of push: body was pushed last, ty first + let new_body = results.pop().unwrap(); + let new_ty = results.pop().unwrap(); + let (orig_ty, orig_body) = match orig.as_ref() { + Expr::All(t, b) => (t, b), + _ => unreachable!(), + }; + let result = if Arc::ptr_eq(&new_ty, orig_ty) + && Arc::ptr_eq(&new_body, orig_body) + { + orig.clone() + } else { + Expr::all(new_ty, new_body) + }; + let ptr = orig.as_ref() as *const Expr; + cache.insert(ptr, result.clone()); + results.push(result); + }, + + RewriteFrame::BuildLet(orig, non_dep) => { + // Pop in reverse order of push: body, val, ty + let new_body = results.pop().unwrap(); + let new_val = results.pop().unwrap(); + let new_ty = results.pop().unwrap(); + let (orig_ty, orig_val, orig_body) = match orig.as_ref() { + Expr::Let(_, t, v, b) => (t, v, b), + _ => unreachable!(), + }; + let result = if Arc::ptr_eq(&new_ty, orig_ty) + && Arc::ptr_eq(&new_val, orig_val) + && Arc::ptr_eq(&new_body, orig_body) + { + orig.clone() + } else { + Expr::let_(non_dep, new_ty, new_val, new_body) + }; + let ptr = orig.as_ref() as *const Expr; + cache.insert(ptr, result.clone()); + results.push(result); + }, + } + } + + results.pop().unwrap() +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Test that demonstrates the early-break bug in decide_sharing. + /// + /// The bug: decide_sharing sorts candidates by "gross benefit" (n-1)*size + /// and breaks on the first unprofitable candidate. However, as ref_size + /// grows (1 byte for idx<8, 2 bytes for idx>=8), a high-usage small-size + /// term may become unprofitable while a low-usage large-size term remains + /// profitable. + /// + /// At ref_size=2 (idx >= 8): + /// - Term A: size=2, n=10, gross=18, savings = 18 - 20 = -2 < 0 (triggers break!) + /// - Term B: size=5, n=2, gross=5, savings = 5 - 4 = 1 > 0 (profitable but skipped) + /// + /// We need 8 filler terms with gross > 18 to fill indices 0-7 first. + #[test] + fn test_early_break_bug() { + // Filler: 8 unique terms with gross > 18 + // Var(256)..Var(263), each appearing 10 times + // size=3 (256 fits in 2 bytes after Tag4 header), n=10, gross=9*3=27 > 18 + let mut all_exprs: Vec> = Vec::new(); + + for i in 0..8u64 { + let var = Expr::var(256 + i); // size=3 + for _ in 0..10 { + all_exprs.push(var.clone()); + } + } + + // Term A: Var(10), appearing 10 times + // size=2 (10 < 256, so fits in Tag4 with 2-byte encoding), n=10, gross=9*2=18 + // At ref_size=2 (idx >= 8): savings = 18 - 20 = -2 < 0 (triggers break!) + let term_a = Expr::var(10); + for _ in 0..10 { + all_exprs.push(term_a.clone()); + } + + // Term B: All(Var(0), All(Var(1), Var(2))) appearing 2 times + // This has effective_size = 1 + 1 + (1 + 1 + 1) = 5 + // gross = 1*5 = 5 < 18 ✓ (comes after A in sort order) + // At ref_size=2: savings = 5 - 4 = 1 > 0 ✓ (profitable!) + let term_b = Expr::all(Expr::var(0), Expr::all(Expr::var(1), Expr::var(2))); + all_exprs.push(term_b.clone()); + all_exprs.push(term_b.clone()); + + // Analyze all expressions together + let (info_map, ptr_to_hash) = analyze_block(&all_exprs, false); + let shared = decide_sharing(&info_map); + + // Verify term_a was found with usage_count=10 + let term_a_ptr = term_a.as_ref() as *const Expr; + let term_a_hash = ptr_to_hash.get(&term_a_ptr); + if let Some(hash) = term_a_hash { + let info = info_map.get(hash).unwrap(); + assert_eq!(info.usage_count, 10, "term_a should have usage_count=10"); + } + + // Find term B's hash - it's the outer All(Var(0), ...) + let term_b_ptr = term_b.as_ref() as *const Expr; + let term_b_hash = ptr_to_hash.get(&term_b_ptr); + + if let Some(hash) = term_b_hash { + let info = info_map.get(hash).unwrap(); + assert_eq!(info.usage_count, 2, "term_b should have usage_count=2"); + + // Compute effective size + let topo = topological_sort(&info_map); + let sizes = compute_effective_sizes(&info_map, &topo); + let term_b_size = sizes.get(hash).copied().unwrap_or(0); + + // This assertion will FAIL with buggy code (early break) and PASS with fix + assert!( + shared.contains(hash), + "Term B (effective_size={}, n=2, gross={}) should be shared. \ + At ref_size=2, savings = {} - 4 = {} > 0. \ + But early-break bug skips it after term A fails. \ + shared.len()={}", + term_b_size, + term_b_size, // gross = (n-1)*size = 1*size + term_b_size, + term_b_size as isize - 4, + shared.len() + ); + } + } + + #[test] + fn test_analyze_simple() { + // Create a simple expression: App(Var(0), Var(0)) + // Var(0) should have usage_count = 2 + let var0 = Expr::var(0); + let app = Expr::app(var0.clone(), var0); + + let (info_map, ptr_to_hash) = analyze_block(&[app], false); + + // Should have 2 unique subterms: Var(0) and App(Var(0), Var(0)) + assert_eq!(info_map.len(), 2); + + // Find Var(0) info - it should have usage_count = 2 + let var_hash = ptr_to_hash.values().find(|h| { + info_map + .get(*h) + .is_some_and(|info| matches!(info.expr.as_ref(), Expr::Var(0))) + }); + assert!(var_hash.is_some()); + let var_info = info_map.get(var_hash.unwrap()).unwrap(); + assert_eq!(var_info.usage_count, 2); + } + + #[test] + fn test_decide_sharing_simple() { + // Create expression with repeated subterm + let ty = Expr::sort(0); + let lam1 = Expr::lam(ty.clone(), Expr::var(0)); + let lam2 = Expr::lam(ty.clone(), Expr::var(1)); + let app = Expr::app(lam1, lam2); + + let (info_map, _) = analyze_block(&[app], false); + let shared = decide_sharing(&info_map); + + // ty (Sort(0)) appears twice, might be shared depending on size + // This is a basic smoke test + assert!(shared.len() <= info_map.len()); + } + + #[test] + fn test_topological_sort() { + let var0 = Expr::var(0); + let var1 = Expr::var(1); + let app = Expr::app(var0, var1); + + let (info_map, _) = analyze_block(&[app], false); + let topo = topological_sort(&info_map); + + // Should have all hashes + assert_eq!(topo.len(), info_map.len()); + + // Leaves (Var) should come before App + let app_hash = info_map + .iter() + .find(|(_, info)| matches!(info.expr.as_ref(), Expr::App(..))) + .map(|(h, _)| *h); + + if let Some(app_h) = app_hash { + let app_pos = topo.iter().position(|h| *h == app_h).unwrap(); + // App should be last (after its children) + for child_hash in &info_map.get(&app_h).unwrap().children { + let child_pos = topo.iter().position(|h| h == child_hash).unwrap(); + assert!( + child_pos < app_pos, + "Child should come before parent in topo order" + ); + } + } + } + + #[test] + fn test_build_sharing_vec() { + // Create expression with a shared subterm: App(App(var0, var0), var0) + // var0 appears 3 times, should be shared + let var0 = Expr::var(0); + let app1 = Expr::app(var0.clone(), var0.clone()); + let app2 = Expr::app(app1, var0); + + let (info_map, ptr_to_hash) = + analyze_block(std::slice::from_ref(&app2), false); + let shared = decide_sharing(&info_map); + + // If var0 is shared, verify it + if !shared.is_empty() { + let (rewritten, sharing_vec) = + build_sharing_vec(&[app2], &shared, &ptr_to_hash, &info_map); + + // Sharing vec should have the shared expressions + assert_eq!(sharing_vec.len(), shared.len()); + + // Rewritten should have at least one Share reference if sharing happened + assert_eq!(rewritten.len(), 1); + } + } + + #[test] + fn test_roundtrip_with_sharing() { + use crate::ix::ixon::serialize::{get_expr, put_expr}; + + // Create a simple expression with potential sharing + let var0 = Expr::var(0); + let var1 = Expr::var(1); + let app = Expr::app(var0, var1); + + // Serialize and deserialize without sharing + let mut buf = Vec::new(); + put_expr(&app, &mut buf); + let recovered = get_expr(&mut buf.as_slice()).unwrap(); + + assert_eq!(app.as_ref(), recovered.as_ref()); + } +} diff --git a/src/ix/ixon/tag.rs b/src/ix/ixon/tag.rs new file mode 100644 index 00000000..0ea5e1d0 --- /dev/null +++ b/src/ix/ixon/tag.rs @@ -0,0 +1,602 @@ +//! Tag encodings for compact serialization. +//! +//! - Tag4: 4-bit flag for expressions (16 variants) +//! - Tag2: 2-bit flag for universes (4 variants) +//! - Tag0: No flag, just variable-length u64 + +#![allow(clippy::needless_pass_by_value)] + +/// Count how many bytes needed to represent a u64. +pub fn u64_byte_count(x: u64) -> u8 { + match x { + 0 => 0, + x if x < 0x0000_0000_0000_0100 => 1, + x if x < 0x0000_0000_0001_0000 => 2, + x if x < 0x0000_0000_0100_0000 => 3, + x if x < 0x0000_0001_0000_0000 => 4, + x if x < 0x0000_0100_0000_0000 => 5, + x if x < 0x0001_0000_0000_0000 => 6, + x if x < 0x0100_0000_0000_0000 => 7, + _ => 8, + } +} + +/// Write a u64 in minimal little-endian bytes. +pub fn u64_put_trimmed_le(x: u64, buf: &mut Vec) { + let n = u64_byte_count(x) as usize; + buf.extend_from_slice(&x.to_le_bytes()[..n]) +} + +/// Read a u64 from minimal little-endian bytes. +pub fn u64_get_trimmed_le(len: usize, buf: &mut &[u8]) -> Result { + let mut res = [0u8; 8]; + if len > 8 { + return Err("u64_get_trimmed_le: len > 8".to_string()); + } + match buf.split_at_checked(len) { + Some((head, rest)) => { + *buf = rest; + res[..len].copy_from_slice(head); + Ok(u64::from_le_bytes(res)) + }, + None => Err(format!("u64_get_trimmed_le: EOF, need {len} bytes")), + } +} + +/// Tag4: 4-bit flag for expressions. +/// +/// Header byte: `[flag:4][large:1][size:3]` +/// - If large=0: size is in low 3 bits (0-7) +/// - If large=1: (size+1) bytes follow containing the actual size +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Tag4 { + pub flag: u8, + pub size: u64, +} + +impl Tag4 { + pub fn new(flag: u8, size: u64) -> Self { + debug_assert!(flag < 16, "Tag4 flag must be < 16"); + Tag4 { flag, size } + } + + #[allow(clippy::cast_possible_truncation)] + pub fn encode_head(&self) -> u8 { + if self.size < 8 { + (self.flag << 4) + (self.size as u8) + } else { + (self.flag << 4) + 0b1000 + (u64_byte_count(self.size) - 1) + } + } + + pub fn decode_head(head: u8) -> (u8, bool, u8) { + (head >> 4, head & 0b1000 != 0, head % 0b1000) + } + + pub fn put(&self, buf: &mut Vec) { + buf.push(self.encode_head()); + if self.size >= 8 { + u64_put_trimmed_le(self.size, buf) + } + } + + pub fn get(buf: &mut &[u8]) -> Result { + let head = match buf.split_first() { + Some((&h, rest)) => { + *buf = rest; + h + }, + None => return Err("Tag4::get: EOF".to_string()), + }; + let (flag, large, small) = Self::decode_head(head); + let size = if large { + u64_get_trimmed_le((small + 1) as usize, buf)? + } else { + small as u64 + }; + Ok(Tag4 { flag, size }) + } + + /// Calculate the encoded size of this tag in bytes. + pub fn encoded_size(&self) -> usize { + if self.size < 8 { 1 } else { 1 + u64_byte_count(self.size) as usize } + } +} + +/// Tag2: 2-bit flag for universes. +/// +/// Header byte: `[flag:2][large:1][size:5]` +/// - If large=0: size is in low 5 bits (0-31) +/// - If large=1: (size+1) bytes follow containing the actual size +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Tag2 { + pub flag: u8, + pub size: u64, +} + +impl Tag2 { + pub fn new(flag: u8, size: u64) -> Self { + debug_assert!(flag < 4, "Tag2 flag must be < 4"); + Tag2 { flag, size } + } + + #[allow(clippy::cast_possible_truncation)] + pub fn encode_head(&self) -> u8 { + if self.size < 32 { + (self.flag << 6) + (self.size as u8) + } else { + (self.flag << 6) + 0b10_0000 + (u64_byte_count(self.size) - 1) + } + } + + pub fn decode_head(head: u8) -> (u8, bool, u8) { + (head >> 6, head & 0b10_0000 != 0, head % 0b10_0000) + } + + pub fn put(&self, buf: &mut Vec) { + buf.push(self.encode_head()); + if self.size >= 32 { + u64_put_trimmed_le(self.size, buf) + } + } + + pub fn get(buf: &mut &[u8]) -> Result { + let head = match buf.split_first() { + Some((&h, rest)) => { + *buf = rest; + h + }, + None => return Err("Tag2::get: EOF".to_string()), + }; + let (flag, large, small) = Self::decode_head(head); + let size = if large { + u64_get_trimmed_le((small + 1) as usize, buf)? + } else { + small as u64 + }; + Ok(Tag2 { flag, size }) + } + + /// Calculate the encoded size of this tag in bytes. + pub fn encoded_size(&self) -> usize { + if self.size < 32 { 1 } else { 1 + u64_byte_count(self.size) as usize } + } +} + +/// Tag0: No flag, just variable-length u64. +/// +/// Header byte: `[large:1][size:7]` +/// - If large=0: size is in low 7 bits (0-127) +/// - If large=1: (size+1) bytes follow containing the actual size +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Tag0 { + pub size: u64, +} + +impl Tag0 { + pub fn new(size: u64) -> Self { + Tag0 { size } + } + + #[allow(clippy::cast_possible_truncation)] + pub fn encode_head(&self) -> u8 { + if self.size < 128 { + self.size as u8 + } else { + 0b1000_0000 + (u64_byte_count(self.size) - 1) + } + } + + pub fn decode_head(head: u8) -> (bool, u8) { + (head & 0b1000_0000 != 0, head % 0b1000_0000) + } + + pub fn put(&self, buf: &mut Vec) { + buf.push(self.encode_head()); + if self.size >= 128 { + u64_put_trimmed_le(self.size, buf) + } + } + + pub fn get(buf: &mut &[u8]) -> Result { + let head = match buf.split_first() { + Some((&h, rest)) => { + *buf = rest; + h + }, + None => return Err("Tag0::get: EOF".to_string()), + }; + let (large, small) = Self::decode_head(head); + let size = if large { + u64_get_trimmed_le((small + 1) as usize, buf)? + } else { + small as u64 + }; + Ok(Tag0 { size }) + } + + /// Calculate the encoded size of this tag in bytes. + pub fn encoded_size(&self) -> usize { + if self.size < 128 { 1 } else { 1 + u64_byte_count(self.size) as usize } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use quickcheck::{Arbitrary, Gen}; + + // ============================================================================ + // Arbitrary implementations + // ============================================================================ + + impl Arbitrary for Tag4 { + fn arbitrary(g: &mut Gen) -> Self { + let flag = u8::arbitrary(g) % 16; + Tag4::new(flag, u64::arbitrary(g)) + } + } + + impl Arbitrary for Tag2 { + fn arbitrary(g: &mut Gen) -> Self { + let flag = u8::arbitrary(g) % 4; + Tag2::new(flag, u64::arbitrary(g)) + } + } + + impl Arbitrary for Tag0 { + fn arbitrary(g: &mut Gen) -> Self { + Tag0::new(u64::arbitrary(g)) + } + } + + // ============================================================================ + // Property-based tests + // ============================================================================ + + #[quickcheck] + fn prop_tag4_roundtrip(t: Tag4) -> bool { + let mut buf = Vec::new(); + t.put(&mut buf); + match Tag4::get(&mut buf.as_slice()) { + Ok(t2) => t == t2, + Err(_) => false, + } + } + + #[quickcheck] + fn prop_tag4_encoded_size(t: Tag4) -> bool { + let mut buf = Vec::new(); + t.put(&mut buf); + buf.len() == t.encoded_size() + } + + #[quickcheck] + fn prop_tag2_roundtrip(t: Tag2) -> bool { + let mut buf = Vec::new(); + t.put(&mut buf); + match Tag2::get(&mut buf.as_slice()) { + Ok(t2) => t == t2, + Err(_) => false, + } + } + + #[quickcheck] + fn prop_tag2_encoded_size(t: Tag2) -> bool { + let mut buf = Vec::new(); + t.put(&mut buf); + buf.len() == t.encoded_size() + } + + #[quickcheck] + fn prop_tag0_roundtrip(t: Tag0) -> bool { + let mut buf = Vec::new(); + t.put(&mut buf); + match Tag0::get(&mut buf.as_slice()) { + Ok(t2) => t == t2, + Err(_) => false, + } + } + + #[quickcheck] + fn prop_tag0_encoded_size(t: Tag0) -> bool { + let mut buf = Vec::new(); + t.put(&mut buf); + buf.len() == t.encoded_size() + } + + // ============================================================================ + // Unit tests + // ============================================================================ + + #[test] + fn test_u64_trimmed() { + fn roundtrip(x: u64) -> bool { + let mut buf = Vec::new(); + let n = u64_byte_count(x); + u64_put_trimmed_le(x, &mut buf); + match u64_get_trimmed_le(n as usize, &mut buf.as_slice()) { + Ok(y) => x == y, + Err(_) => false, + } + } + assert!(roundtrip(0)); + assert!(roundtrip(1)); + assert!(roundtrip(127)); + assert!(roundtrip(128)); + assert!(roundtrip(255)); + assert!(roundtrip(256)); + assert!(roundtrip(0xFFFF_FFFF_FFFF_FFFF)); + } + + #[test] + fn tag4_small_values() { + for size in 0..8u64 { + for flag in 0..16u8 { + let tag = Tag4::new(flag, size); + let mut buf = Vec::new(); + tag.put(&mut buf); + assert_eq!(buf.len(), 1, "Tag4({flag}, {size}) should be 1 byte"); + + let mut slice: &[u8] = &buf; + let recovered = Tag4::get(&mut slice).unwrap(); + assert_eq!(recovered, tag, "Tag4({flag}, {size}) roundtrip failed"); + assert!(slice.is_empty(), "Tag4({flag}, {size}) had trailing bytes"); + } + } + } + + #[test] + fn tag4_large_values() { + let sizes = [8u64, 255, 256, 65535, 65536, u64::from(u32::MAX), u64::MAX]; + for size in sizes { + for flag in 0..16u8 { + let tag = Tag4::new(flag, size); + let mut buf = Vec::new(); + tag.put(&mut buf); + + let mut slice: &[u8] = &buf; + let recovered = Tag4::get(&mut slice).unwrap(); + assert_eq!(recovered, tag, "Tag4({flag}, {size}) roundtrip failed"); + assert!(slice.is_empty(), "Tag4({flag}, {size}) had trailing bytes"); + } + } + } + + #[test] + fn tag4_encoded_size_test() { + assert_eq!(Tag4::new(0, 0).encoded_size(), 1); + assert_eq!(Tag4::new(0, 7).encoded_size(), 1); + assert_eq!(Tag4::new(0, 8).encoded_size(), 2); + assert_eq!(Tag4::new(0, 255).encoded_size(), 2); + assert_eq!(Tag4::new(0, 256).encoded_size(), 3); + assert_eq!(Tag4::new(0, 65535).encoded_size(), 3); + assert_eq!(Tag4::new(0, 65536).encoded_size(), 4); + } + + #[test] + fn tag4_byte_boundaries() { + let test_cases: Vec<(u64, usize)> = vec![ + (0, 1), + (7, 1), + (8, 2), + (0xFF, 2), + (0x100, 3), + (0xFFFF, 3), + (0x10000, 4), + (0xFFFFFF, 4), + (0x1000000, 5), + (0xFFFFFFFF, 5), + (0x100000000, 6), + (0xFFFFFFFFFF, 6), + (0x10000000000, 7), + (0xFFFFFFFFFFFF, 7), + (0x1000000000000, 8), + (0xFFFFFFFFFFFFFF, 8), + (0x100000000000000, 9), + (u64::MAX, 9), + ]; + + for (size, expected_bytes) in &test_cases { + let tag = Tag4::new(0, *size); + let mut buf = Vec::new(); + tag.put(&mut buf); + + assert_eq!( + buf.len(), + *expected_bytes, + "Tag4 with size 0x{:X} should be {} bytes, got {}", + size, + expected_bytes, + buf.len() + ); + + let mut slice: &[u8] = &buf; + let recovered = Tag4::get(&mut slice).unwrap(); + assert_eq!(recovered, tag, "Round-trip failed for size 0x{:X}", size); + assert!(slice.is_empty()); + } + } + + // ============================================================================ + // Tag2 unit tests + // ============================================================================ + + #[test] + fn tag2_small_values() { + for size in 0..32u64 { + for flag in 0..4u8 { + let tag = Tag2::new(flag, size); + let mut buf = Vec::new(); + tag.put(&mut buf); + assert_eq!(buf.len(), 1, "Tag2({flag}, {size}) should be 1 byte"); + + let mut slice: &[u8] = &buf; + let recovered = Tag2::get(&mut slice).unwrap(); + assert_eq!(recovered, tag, "Tag2({flag}, {size}) roundtrip failed"); + assert!(slice.is_empty(), "Tag2({flag}, {size}) had trailing bytes"); + } + } + } + + #[test] + fn tag2_large_values() { + let sizes = [32u64, 255, 256, 65535, 65536, u64::from(u32::MAX), u64::MAX]; + for size in sizes { + for flag in 0..4u8 { + let tag = Tag2::new(flag, size); + let mut buf = Vec::new(); + tag.put(&mut buf); + + let mut slice: &[u8] = &buf; + let recovered = Tag2::get(&mut slice).unwrap(); + assert_eq!(recovered, tag, "Tag2({flag}, {size}) roundtrip failed"); + assert!(slice.is_empty(), "Tag2({flag}, {size}) had trailing bytes"); + } + } + } + + #[test] + fn tag2_encoded_size_test() { + assert_eq!(Tag2::new(0, 0).encoded_size(), 1); + assert_eq!(Tag2::new(0, 31).encoded_size(), 1); + assert_eq!(Tag2::new(0, 32).encoded_size(), 2); + assert_eq!(Tag2::new(0, 255).encoded_size(), 2); + assert_eq!(Tag2::new(0, 256).encoded_size(), 3); + assert_eq!(Tag2::new(0, 65535).encoded_size(), 3); + assert_eq!(Tag2::new(0, 65536).encoded_size(), 4); + } + + #[test] + fn tag2_byte_boundaries() { + let test_cases: Vec<(u64, usize)> = vec![ + (0, 1), + (31, 1), + (32, 2), + (0xFF, 2), + (0x100, 3), + (0xFFFF, 3), + (0x10000, 4), + (0xFFFFFF, 4), + (0x1000000, 5), + (0xFFFFFFFF, 5), + (0x100000000, 6), + (0xFFFFFFFFFF, 6), + (0x10000000000, 7), + (0xFFFFFFFFFFFF, 7), + (0x1000000000000, 8), + (0xFFFFFFFFFFFFFF, 8), + (0x100000000000000, 9), + (u64::MAX, 9), + ]; + + for (size, expected_bytes) in &test_cases { + let tag = Tag2::new(0, *size); + let mut buf = Vec::new(); + tag.put(&mut buf); + + assert_eq!( + buf.len(), + *expected_bytes, + "Tag2 with size 0x{:X} should be {} bytes, got {}", + size, + expected_bytes, + buf.len() + ); + + let mut slice: &[u8] = &buf; + let recovered = Tag2::get(&mut slice).unwrap(); + assert_eq!(recovered, tag, "Round-trip failed for size 0x{:X}", size); + assert!(slice.is_empty()); + } + } + + // ============================================================================ + // Tag0 unit tests + // ============================================================================ + + #[test] + fn tag0_small_values() { + for size in 0..128u64 { + let tag = Tag0::new(size); + let mut buf = Vec::new(); + tag.put(&mut buf); + assert_eq!(buf.len(), 1, "Tag0({size}) should be 1 byte"); + + let mut slice: &[u8] = &buf; + let recovered = Tag0::get(&mut slice).unwrap(); + assert_eq!(recovered, tag, "Tag0({size}) roundtrip failed"); + assert!(slice.is_empty(), "Tag0({size}) had trailing bytes"); + } + } + + #[test] + fn tag0_large_values() { + let sizes = [128u64, 255, 256, 65535, 65536, u64::from(u32::MAX), u64::MAX]; + for size in sizes { + let tag = Tag0::new(size); + let mut buf = Vec::new(); + tag.put(&mut buf); + + let mut slice: &[u8] = &buf; + let recovered = Tag0::get(&mut slice).unwrap(); + assert_eq!(recovered, tag, "Tag0({size}) roundtrip failed"); + assert!(slice.is_empty(), "Tag0({size}) had trailing bytes"); + } + } + + #[test] + fn tag0_encoded_size_test() { + assert_eq!(Tag0::new(0).encoded_size(), 1); + assert_eq!(Tag0::new(127).encoded_size(), 1); + assert_eq!(Tag0::new(128).encoded_size(), 2); + assert_eq!(Tag0::new(255).encoded_size(), 2); + assert_eq!(Tag0::new(256).encoded_size(), 3); + assert_eq!(Tag0::new(65535).encoded_size(), 3); + assert_eq!(Tag0::new(65536).encoded_size(), 4); + } + + #[test] + fn tag0_byte_boundaries() { + let test_cases: Vec<(u64, usize)> = vec![ + (0, 1), + (127, 1), + (128, 2), + (0xFF, 2), + (0x100, 3), + (0xFFFF, 3), + (0x10000, 4), + (0xFFFFFF, 4), + (0x1000000, 5), + (0xFFFFFFFF, 5), + (0x100000000, 6), + (0xFFFFFFFFFF, 6), + (0x10000000000, 7), + (0xFFFFFFFFFFFF, 7), + (0x1000000000000, 8), + (0xFFFFFFFFFFFFFF, 8), + (0x100000000000000, 9), + (u64::MAX, 9), + ]; + + for (size, expected_bytes) in &test_cases { + let tag = Tag0::new(*size); + let mut buf = Vec::new(); + tag.put(&mut buf); + + assert_eq!( + buf.len(), + *expected_bytes, + "Tag0 with size 0x{:X} should be {} bytes, got {}", + size, + expected_bytes, + buf.len() + ); + + let mut slice: &[u8] = &buf; + let recovered = Tag0::get(&mut slice).unwrap(); + assert_eq!(recovered, tag, "Round-trip failed for size 0x{:X}", size); + assert!(slice.is_empty()); + } + } +} diff --git a/src/ix/ixon/univ.rs b/src/ix/ixon/univ.rs new file mode 100644 index 00000000..ce3e9db8 --- /dev/null +++ b/src/ix/ixon/univ.rs @@ -0,0 +1,288 @@ +//! Universe levels. + +#![allow(clippy::needless_pass_by_value)] + +use std::sync::Arc; + +use super::tag::Tag2; + +/// Universe levels for Lean's type system. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum Univ { + /// Universe zero (Prop/Type 0) + Zero, + /// Successor universe + Succ(Arc), + /// Maximum of two universes + Max(Arc, Arc), + /// Impredicative maximum (IMax u v = 0 if v = 0, else Max u v) + IMax(Arc, Arc), + /// Universe parameter (de Bruijn index) + Var(u64), +} + +impl Univ { + /// Tag2 flags for universe variants. + pub const FLAG_ZERO_SUCC: u8 = 0; // size=0 for Zero, size=1 for Succ + pub const FLAG_MAX: u8 = 1; + pub const FLAG_IMAX: u8 = 2; + pub const FLAG_VAR: u8 = 3; + + pub fn zero() -> Arc { + Arc::new(Univ::Zero) + } + + pub fn succ(u: Arc) -> Arc { + Arc::new(Univ::Succ(u)) + } + + pub fn max(a: Arc, b: Arc) -> Arc { + Arc::new(Univ::Max(a, b)) + } + + pub fn imax(a: Arc, b: Arc) -> Arc { + Arc::new(Univ::IMax(a, b)) + } + + pub fn var(idx: u64) -> Arc { + Arc::new(Univ::Var(idx)) + } +} + +/// Serialize a universe to bytes (iterative to avoid stack overflow). +pub fn put_univ(u: &Univ, buf: &mut Vec) { + let mut stack: Vec<&Univ> = vec![u]; + + while let Some(curr) = stack.pop() { + match curr { + Univ::Zero => { + Tag2::new(Univ::FLAG_ZERO_SUCC, 0).put(buf); + }, + Univ::Succ(inner) => { + // Count the number of successors for telescope compression + let mut count = 1u64; + let mut base = inner.as_ref(); + while let Univ::Succ(next) = base { + count += 1; + base = next.as_ref(); + } + Tag2::new(Univ::FLAG_ZERO_SUCC, count).put(buf); + stack.push(base); + }, + Univ::Max(a, b) => { + Tag2::new(Univ::FLAG_MAX, 0).put(buf); + stack.push(b); // Process b after a + stack.push(a); + }, + Univ::IMax(a, b) => { + Tag2::new(Univ::FLAG_IMAX, 0).put(buf); + stack.push(b); // Process b after a + stack.push(a); + }, + Univ::Var(idx) => { + Tag2::new(Univ::FLAG_VAR, *idx).put(buf); + }, + } + } +} + +/// Frame for iterative universe deserialization. +enum GetUnivFrame { + /// Parse a universe from the buffer + Parse, + /// Wrap the top result in `count` Succs + WrapSuccs(u64), + /// Pop two results (b then a) and push Max(a, b) + BuildMax, + /// Pop two results (b then a) and push IMax(a, b) + BuildIMax, +} + +/// Deserialize a universe from bytes (iterative to avoid stack overflow). +pub fn get_univ(buf: &mut &[u8]) -> Result, String> { + let mut work: Vec = vec![GetUnivFrame::Parse]; + let mut results: Vec> = Vec::new(); + + while let Some(frame) = work.pop() { + match frame { + GetUnivFrame::Parse => { + let tag = Tag2::get(buf)?; + match tag.flag { + Univ::FLAG_ZERO_SUCC => { + if tag.size == 0 { + results.push(Univ::zero()); + } else { + // Parse inner, then wrap in Succs + work.push(GetUnivFrame::WrapSuccs(tag.size)); + work.push(GetUnivFrame::Parse); + } + }, + Univ::FLAG_MAX => { + // Parse a, parse b, then build Max(a, b) + work.push(GetUnivFrame::BuildMax); + work.push(GetUnivFrame::Parse); // b + work.push(GetUnivFrame::Parse); // a + }, + Univ::FLAG_IMAX => { + // Parse a, parse b, then build IMax(a, b) + work.push(GetUnivFrame::BuildIMax); + work.push(GetUnivFrame::Parse); // b + work.push(GetUnivFrame::Parse); // a + }, + Univ::FLAG_VAR => { + results.push(Univ::var(tag.size)); + }, + f => return Err(format!("get_univ: invalid flag {f}")), + } + }, + GetUnivFrame::WrapSuccs(count) => { + let mut result = + results.pop().ok_or("get_univ: missing result for WrapSuccs")?; + for _ in 0..count { + result = Univ::succ(result); + } + results.push(result); + }, + GetUnivFrame::BuildMax => { + let b = results.pop().ok_or("get_univ: missing b for Max")?; + let a = results.pop().ok_or("get_univ: missing a for Max")?; + results.push(Univ::max(a, b)); + }, + GetUnivFrame::BuildIMax => { + let b = results.pop().ok_or("get_univ: missing b for IMax")?; + let a = results.pop().ok_or("get_univ: missing a for IMax")?; + results.push(Univ::imax(a, b)); + }, + } + } + + results.pop().ok_or_else(|| "get_univ: no result".to_string()) +} + +#[cfg(test)] +pub mod tests { + use super::*; + use crate::ix::ixon::tests::{gen_range, next_case}; + use quickcheck::{Arbitrary, Gen}; + use std::ptr; + + #[derive(Clone, Copy)] + enum Case { + Zero, + Succ, + Max, + IMax, + Var, + } + + /// Generate an arbitrary Univ using pointer-tree technique (no stack overflow) + pub fn arbitrary_univ(g: &mut Gen) -> Arc { + let mut root = Univ::Zero; + let mut stack = vec![&mut root as *mut Univ]; + + while let Some(ptr) = stack.pop() { + let gens = [ + (100, Case::Zero), + (100, Case::Var), + (50, Case::Succ), + (30, Case::Max), + (20, Case::IMax), + ]; + match next_case(g, &gens) { + Case::Zero => unsafe { + ptr::write(ptr, Univ::Zero); + }, + Case::Var => unsafe { + ptr::write(ptr, Univ::Var(gen_range(g, 0..16) as u64)); + }, + Case::Succ => { + let mut inner = Arc::new(Univ::Zero); + let inner_ptr = Arc::get_mut(&mut inner).unwrap() as *mut Univ; + unsafe { + ptr::write(ptr, Univ::Succ(inner)); + } + stack.push(inner_ptr); + }, + Case::Max => { + let mut a = Arc::new(Univ::Zero); + let mut b = Arc::new(Univ::Zero); + let (a_ptr, b_ptr) = ( + Arc::get_mut(&mut a).unwrap() as *mut Univ, + Arc::get_mut(&mut b).unwrap() as *mut Univ, + ); + unsafe { + ptr::write(ptr, Univ::Max(a, b)); + } + stack.push(b_ptr); + stack.push(a_ptr); + }, + Case::IMax => { + let mut a = Arc::new(Univ::Zero); + let mut b = Arc::new(Univ::Zero); + let (a_ptr, b_ptr) = ( + Arc::get_mut(&mut a).unwrap() as *mut Univ, + Arc::get_mut(&mut b).unwrap() as *mut Univ, + ); + unsafe { + ptr::write(ptr, Univ::IMax(a, b)); + } + stack.push(b_ptr); + stack.push(a_ptr); + }, + } + } + Arc::new(root) + } + + #[derive(Clone, Debug)] + struct ArbitraryUniv(Arc); + + impl Arbitrary for ArbitraryUniv { + fn arbitrary(g: &mut Gen) -> Self { + ArbitraryUniv(arbitrary_univ(g)) + } + } + + fn roundtrip(u: &Univ) -> bool { + let mut buf = Vec::new(); + put_univ(u, &mut buf); + match get_univ(&mut buf.as_slice()) { + Ok(result) => result.as_ref() == u, + Err(_) => false, + } + } + + #[quickcheck] + fn prop_univ_roundtrip(u: ArbitraryUniv) -> bool { + roundtrip(&u.0) + } + + #[test] + fn test_univ_zero() { + assert!(roundtrip(&Univ::Zero)); + } + + #[test] + fn test_univ_succ() { + assert!(roundtrip(&Univ::Succ(Univ::zero()))); + assert!(roundtrip(&Univ::Succ(Arc::new(Univ::Succ(Arc::new( + Univ::Succ(Univ::zero()) + )))))); + } + + #[test] + fn test_univ_max() { + assert!(roundtrip(&Univ::Max(Univ::var(0), Univ::var(1)))); + } + + #[test] + fn test_univ_var() { + assert!(roundtrip(&Univ::Var(0))); + assert!(roundtrip(&Univ::Var(100))); + } + + #[test] + fn test_univ_succ_telescope() { + assert!(roundtrip(&Univ::succ(Univ::succ(Univ::succ(Univ::zero()))))); + } +} diff --git a/src/ix/mutual.rs b/src/ix/mutual.rs index 8f6aa12b..3e0e5dde 100644 --- a/src/ix/mutual.rs +++ b/src/ix/mutual.rs @@ -1,27 +1,45 @@ +//! Types for representing mutual definition blocks in the compilation pipeline. +//! +//! Mutual blocks are groups of definitions that reference each other cyclically. +//! [`MutCtx`] maps names to their indices within a mutual block, and the +//! [`ctx_to_all`] / [`all_to_ctx`] functions convert between ordered name +//! vectors and index maps. + use crate::{ ix::env::{ ConstructorVal, DefinitionSafety, DefinitionVal, Expr, InductiveVal, Name, OpaqueVal, RecursorVal, ReducibilityHints, TheoremVal, }, - ix::ixon::DefKind, + ix::ixon::constant::DefKind, lean::nat::Nat, }; use rustc_hash::FxHashMap; +/// A definition-like constant (definition, theorem, or opaque) unified into a +/// single representation for mutual block processing. #[derive(Clone, PartialEq, Eq, Debug)] pub struct Def { + /// Fully-qualified name of the definition. pub name: Name, + /// Universe-polymorphic level parameter names. pub level_params: Vec, + /// The type of the definition. pub typ: Expr, + /// The kind of definition (definition, theorem, or opaque). pub kind: DefKind, + /// The definition body. pub value: Expr, + /// Reducibility hints for the kernel. pub hints: ReducibilityHints, + /// Safety classification. pub safety: DefinitionSafety, + /// Names of all constants in the same mutual block. pub all: Vec, } impl Def { + /// Constructs a `Def` from a [`DefinitionVal`]. pub fn mk_defn(val: &DefinitionVal) -> Self { let DefinitionVal { cnst, value, hints, safety, all } = val; Self { @@ -35,6 +53,7 @@ impl Def { all: all.clone(), } } + /// Constructs a `Def` from a [`TheoremVal`]. pub fn mk_theo(val: &TheoremVal) -> Self { let TheoremVal { cnst, value, all } = val; Self { @@ -48,6 +67,7 @@ impl Def { all: all.clone(), } } + /// Constructs a `Def` from an [`OpaqueVal`]. pub fn mk_opaq(val: &OpaqueVal) -> Self { let OpaqueVal { cnst, value, is_unsafe, all } = val; Self { @@ -67,24 +87,57 @@ impl Def { } } +/// An inductive type bundled with its constructors for mutual block processing. #[derive(Clone, PartialEq, Eq, Debug)] pub struct Ind { + /// The inductive type declaration. pub ind: InductiveVal, + /// The constructors belonging to this inductive type. pub ctors: Vec, } +/// Type alias for a recursor value within a mutual block. pub type Rec = RecursorVal; +/// A constant within a mutual definition block. #[derive(Clone, PartialEq, Eq, Debug)] pub enum MutConst { + /// A definition, theorem, or opaque constant. Defn(Def), + /// An inductive type with its constructors. Indc(Ind), + /// A recursor (eliminator). Recr(Rec), } +/// Maps names to their index within a mutual block. pub type MutCtx = FxHashMap; +/// Convert a MutCtx to a Vec ordered by index. +/// Position i contains the name with Nat value i. +pub fn ctx_to_all(ctx: &MutCtx) -> Vec { + let mut pairs: Vec<_> = ctx.iter().collect(); + pairs.sort_by(|(n1, i1), (n2, i2)| { + i1.to_u64() + .unwrap_or(0) + .cmp(&i2.to_u64().unwrap_or(0)) + .then_with(|| n1.cmp(n2)) + }); + pairs.into_iter().map(|(name, _)| name.clone()).collect() +} + +/// Convert a Vec to a MutCtx. +/// Each name gets its position as the Nat value. +pub fn all_to_ctx(all: &[Name]) -> MutCtx { + let mut ctx = FxHashMap::default(); + for (i, name) in all.iter().enumerate() { + ctx.insert(name.clone(), Nat(i.into())); + } + ctx +} + impl MutConst { + /// Returns the name of this mutual constant. pub fn name(&self) -> Name { match self { Self::Defn(x) => x.name.clone(), @@ -93,12 +146,15 @@ impl MutConst { } } + /// Returns the constructors if this is an inductive, or an empty vec otherwise. pub fn ctors(&self) -> Vec { match self { Self::Indc(ind) => ind.ctors.clone(), _ => vec![], } } + /// Returns `true` if this mutual constant contains the given name + /// (including constructor names for inductives). pub fn contains(&self, name: &Name) -> bool { match self { Self::Defn(x) => x.name == *name, @@ -108,12 +164,15 @@ impl MutConst { }, } } + /// Creates a [`MutCtx`] with a single name at index 0. pub fn single_ctx(name: Name) -> MutCtx { let mut mut_ctx = FxHashMap::default(); mut_ctx.insert(name, Nat(0u64.into())); mut_ctx } + /// Builds a [`MutCtx`] from grouped mutual constant classes, assigning + /// indices to types first and then to constructors. pub fn ctx(classes: &[Vec<&MutConst>]) -> MutCtx { let mut mut_ctx = FxHashMap::default(); let mut i = classes.len(); @@ -131,3 +190,187 @@ impl MutConst { mut_ctx } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::ix::env::{ConstantVal, Level}; + + fn n(s: &str) -> Name { + Name::str(Name::anon(), s.to_string()) + } + + fn sort0() -> Expr { + Expr::sort(Level::zero()) + } + + fn mk_constant_val(name: &str) -> ConstantVal { + ConstantVal { name: n(name), level_params: vec![], typ: sort0() } + } + + fn mk_def(name: &str) -> MutConst { + MutConst::Defn(Def { + name: n(name), + level_params: vec![], + typ: sort0(), + kind: DefKind::Definition, + value: Expr::bvar(Nat(0u64.into())), + hints: ReducibilityHints::Opaque, + safety: DefinitionSafety::Safe, + all: vec![n(name)], + }) + } + + fn mk_ind(name: &str, ctor_names: &[&str]) -> MutConst { + let ctors = ctor_names + .iter() + .enumerate() + .map(|(i, cn)| ConstructorVal { + cnst: mk_constant_val(cn), + induct: n(name), + cidx: Nat::from(i as u64), + num_params: Nat(0u64.into()), + num_fields: Nat(0u64.into()), + is_unsafe: false, + }) + .collect(); + MutConst::Indc(Ind { + ind: InductiveVal { + cnst: mk_constant_val(name), + num_params: Nat(0u64.into()), + num_indices: Nat(0u64.into()), + all: vec![n(name)], + ctors: ctor_names.iter().map(|c| n(c)).collect(), + num_nested: Nat(0u64.into()), + is_rec: false, + is_unsafe: false, + is_reflexive: false, + }, + ctors, + }) + } + + fn mk_rec(name: &str) -> MutConst { + MutConst::Recr(RecursorVal { + cnst: mk_constant_val(name), + all: vec![n(name)], + num_params: Nat(0u64.into()), + num_indices: Nat(0u64.into()), + num_motives: Nat(1u64.into()), + num_minors: Nat(0u64.into()), + rules: vec![], + k: false, + is_unsafe: false, + }) + } + + #[test] + fn all_to_ctx_assigns_indices() { + let names = vec![n("A"), n("B"), n("C")]; + let ctx = all_to_ctx(&names); + assert_eq!(ctx.get(&n("A")), Some(&Nat(0u64.into()))); + assert_eq!(ctx.get(&n("B")), Some(&Nat(1u64.into()))); + assert_eq!(ctx.get(&n("C")), Some(&Nat(2u64.into()))); + assert_eq!(ctx.len(), 3); + } + + #[test] + fn ctx_to_all_roundtrip() { + let names = vec![n("X"), n("Y"), n("Z")]; + let ctx = all_to_ctx(&names); + let recovered = ctx_to_all(&ctx); + assert_eq!(recovered, names); + } + + #[test] + fn single_ctx_has_one_entry() { + let ctx = MutConst::single_ctx(n("Foo")); + assert_eq!(ctx.len(), 1); + assert_eq!(ctx.get(&n("Foo")), Some(&Nat(0u64.into()))); + } + + #[test] + fn ctx_single_class_defs_only() { + let d1 = mk_def("f"); + let d2 = mk_def("g"); + let classes: Vec> = vec![vec![&d1, &d2]]; + let ctx = MutConst::ctx(&classes); + // Both defs get the class index (0) since they're in class 0 + assert_eq!(ctx.get(&n("f")), Some(&Nat(0u64.into()))); + assert_eq!(ctx.get(&n("g")), Some(&Nat(0u64.into()))); + } + + #[test] + fn ctx_class_with_ctors() { + let ind = mk_ind("Bool", &["Bool.true", "Bool.false"]); + let classes: Vec> = vec![vec![&ind]]; + let ctx = MutConst::ctx(&classes); + // The type gets class index 0 + assert_eq!(ctx.get(&n("Bool")), Some(&Nat(0u64.into()))); + // Ctors start at classes.len() = 1 + assert_eq!(ctx.get(&n("Bool.true")), Some(&Nat(1u64.into()))); + assert_eq!(ctx.get(&n("Bool.false")), Some(&Nat(2u64.into()))); + } + + #[test] + fn ctx_two_classes() { + // Class 0: inductive with 2 ctors + let ind = mk_ind("A", &["A.mk1", "A.mk2"]); + // Class 1: definition (no ctors) + let def = mk_def("f"); + let classes: Vec> = vec![vec![&ind], vec![&def]]; + let ctx = MutConst::ctx(&classes); + // Class 0: A at index 0 + assert_eq!(ctx.get(&n("A")), Some(&Nat(0u64.into()))); + // Class 1: f at index 1 + assert_eq!(ctx.get(&n("f")), Some(&Nat(1u64.into()))); + // i starts at classes.len()=2. Class 0 has 2 ctors, so: + assert_eq!(ctx.get(&n("A.mk1")), Some(&Nat(2u64.into()))); + assert_eq!(ctx.get(&n("A.mk2")), Some(&Nat(3u64.into()))); + } + + #[test] + fn contains_defn() { + let d = mk_def("f"); + assert!(d.contains(&n("f"))); + assert!(!d.contains(&n("g"))); + } + + #[test] + fn contains_indc_and_ctors() { + let ind = mk_ind("Nat", &["Nat.zero", "Nat.succ"]); + assert!(ind.contains(&n("Nat"))); + assert!(ind.contains(&n("Nat.zero"))); + assert!(ind.contains(&n("Nat.succ"))); + assert!(!ind.contains(&n("Bool"))); + } + + #[test] + fn contains_recr() { + let r = mk_rec("Nat.rec"); + assert!(r.contains(&n("Nat.rec"))); + assert!(!r.contains(&n("Bool.rec"))); + } + + #[test] + fn name_returns_correct_name() { + assert_eq!(mk_def("f").name(), n("f")); + assert_eq!(mk_ind("T", &["T.mk"]).name(), n("T")); + assert_eq!(mk_rec("T.rec").name(), n("T.rec")); + } + + #[test] + fn ctors_returns_empty_for_non_inductive() { + assert!(mk_def("f").ctors().is_empty()); + assert!(mk_rec("r").ctors().is_empty()); + } + + #[test] + fn ctors_returns_constructor_vals_for_inductive() { + let ind = mk_ind("T", &["T.mk1", "T.mk2"]); + let ctors = ind.ctors(); + assert_eq!(ctors.len(), 2); + assert_eq!(ctors[0].cnst.name, n("T.mk1")); + assert_eq!(ctors[1].cnst.name, n("T.mk2")); + } +} diff --git a/src/ix/store.rs b/src/ix/store.rs index 97e7ca83..fe45f508 100644 --- a/src/ix/store.rs +++ b/src/ix/store.rs @@ -1,14 +1,25 @@ +//! Content-addressed filesystem store for Ix data. +//! +//! Objects are stored at `~/.ix/store/XX/YY/ZZ/` where `XX/YY/ZZ` +//! are derived from the first 6 hex characters of the Blake3 hash. This provides +//! deterministic addressing: identical content always maps to the same path. + use crate::ix::address::Address; use std::env; use std::fs; use std::io; use std::path::PathBuf; +/// Errors that can occur during store operations. #[derive(Debug)] pub enum StoreError { + /// The requested address does not exist in the store. UnknownAddress(Address), + /// An underlying filesystem I/O error. IoError(io::Error), + /// An error during ixon serialization or deserialization. IxonError(String), + /// The `HOME` environment variable is not set. NoHome(env::VarError), } @@ -31,8 +42,10 @@ impl From for StoreError { } } +/// Alias for `Result`. pub type StoreResult = Result; +/// Handle for reading and writing content-addressed objects under `~/.ix/store`. pub struct Store; impl Store { diff --git a/src/ix/strong_ordering.rs b/src/ix/strong_ordering.rs index f57bb50f..c3af587f 100644 --- a/src/ix/strong_ordering.rs +++ b/src/ix/strong_ordering.rs @@ -73,3 +73,157 @@ impl SOrd { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn eq_strong_then_lt() { + let result = SOrd::eq(true).compare(SOrd::lt(true)); + assert_eq!(result.ordering, Ordering::Less); + assert!(result.strong); + } + + #[test] + fn eq_strong_then_gt() { + let result = SOrd::eq(true).compare(SOrd::gt(false)); + assert_eq!(result.ordering, Ordering::Greater); + assert!(!result.strong); + } + + #[test] + fn eq_strong_then_eq() { + let result = SOrd::eq(true).compare(SOrd::eq(true)); + assert_eq!(result.ordering, Ordering::Equal); + assert!(result.strong); + } + + #[test] + fn eq_weak_then_lt() { + let result = SOrd::eq(false).compare(SOrd::lt(true)); + assert_eq!(result.ordering, Ordering::Less); + assert!(!result.strong); // weak propagates + } + + #[test] + fn eq_weak_then_eq() { + let result = SOrd::eq(false).compare(SOrd::eq(true)); + assert_eq!(result.ordering, Ordering::Equal); + assert!(!result.strong); // weak propagates + } + + #[test] + fn lt_strong_short_circuits() { + let result = SOrd::lt(true).compare(SOrd::gt(true)); + assert_eq!(result.ordering, Ordering::Less); + assert!(result.strong); + } + + #[test] + fn gt_strong_short_circuits() { + let result = SOrd::gt(true).compare(SOrd::lt(true)); + assert_eq!(result.ordering, Ordering::Greater); + assert!(result.strong); + } + + #[test] + fn lt_weak_short_circuits() { + let result = SOrd::lt(false).compare(SOrd::gt(true)); + assert_eq!(result.ordering, Ordering::Less); + assert!(!result.strong); + } + + #[test] + fn try_compare_eq_strong_calls_closure() { + let mut called = false; + let result: Result = SOrd::eq(true).try_compare(|| { + called = true; + Ok(SOrd::lt(true)) + }); + assert!(called); + assert_eq!(result.unwrap().ordering, Ordering::Less); + } + + #[test] + fn try_compare_lt_does_not_call_closure() { + let mut called = false; + let result: Result = SOrd::lt(true).try_compare(|| { + called = true; + Ok(SOrd::gt(true)) + }); + assert!(!called); + assert_eq!(result.unwrap().ordering, Ordering::Less); + } + + #[test] + fn try_compare_eq_weak_propagates() { + let result: Result = + SOrd::eq(false).try_compare(|| Ok(SOrd::lt(true))); + let r = result.unwrap(); + assert_eq!(r.ordering, Ordering::Less); + assert!(!r.strong); // weak propagates + } + + #[test] + fn try_zip_both_empty() { + let result: Result = + SOrd::try_zip(|x: &i32, y: &i32| Ok(SOrd::cmp(x, y)), &[], &[]); + let r = result.unwrap(); + assert_eq!(r.ordering, Ordering::Equal); + assert!(r.strong); + } + + #[test] + fn try_zip_left_shorter() { + let result: Result = + SOrd::try_zip(|x: &i32, y: &i32| Ok(SOrd::cmp(x, y)), &[], &[1]); + assert_eq!(result.unwrap().ordering, Ordering::Less); + } + + #[test] + fn try_zip_right_shorter() { + let result: Result = + SOrd::try_zip(|x: &i32, y: &i32| Ok(SOrd::cmp(x, y)), &[1], &[]); + assert_eq!(result.unwrap().ordering, Ordering::Greater); + } + + #[test] + fn try_zip_equal_elements() { + let result: Result = SOrd::try_zip( + |x: &i32, y: &i32| Ok(SOrd::cmp(x, y)), + &[1, 2, 3], + &[1, 2, 3], + ); + let r = result.unwrap(); + assert_eq!(r.ordering, Ordering::Equal); + assert!(r.strong); + } + + #[test] + fn try_zip_first_difference() { + let mut count = 0; + let result: Result = SOrd::try_zip( + |x: &i32, y: &i32| { + count += 1; + Ok(SOrd::cmp(x, y)) + }, + &[1, 5, 3], + &[1, 2, 3], + ); + assert_eq!(result.unwrap().ordering, Ordering::Greater); + assert_eq!(count, 2); // stops after finding the difference at index 1 + } + + #[test] + fn try_zip_weak_propagation() { + let result: Result = SOrd::try_zip( + |x: &i32, y: &i32| Ok(SOrd::weak_cmp(x, y)), + &[1, 2], + &[1, 2], + ); + let r = result.unwrap(); + assert_eq!(r.ordering, Ordering::Equal); + assert!(!r.strong); // weak propagates through the chain + } +} diff --git a/src/lean.rs b/src/lean.rs new file mode 100644 index 00000000..676fb0a8 --- /dev/null +++ b/src/lean.rs @@ -0,0 +1,315 @@ +//! Rust bindings for Lean, implemented by mimicking the memory layout of Lean's +//! low-level C objects. +//! +//! This crate must be kept in sync with `lean/lean.h`. Pay close attention to +//! definitions containing C code in their docstrings. + +pub mod array; +pub mod boxed; +pub mod ctor; +pub mod external; +pub mod ffi; +pub mod nat; +pub mod object; +pub mod sarray; +pub mod string; + +use std::ffi::{CString, c_void}; + +use crate::lean::{ + boxed::{BoxedU64, BoxedUSize}, + ctor::LeanCtorObject, +}; + +#[inline] +#[allow(clippy::not_unsafe_ptr_arg_deref)] +pub fn as_ref_unsafe<'a, T>(ptr: *const T) -> &'a T { + let t_ref = unsafe { ptr.as_ref() }; + t_ref.expect("Null pointer dereference") +} + +#[inline] +#[allow(clippy::not_unsafe_ptr_arg_deref)] +pub fn as_mut_unsafe<'a, T>(ptr: *mut T) -> &'a mut T { + let t_ref = unsafe { ptr.as_mut() }; + t_ref.expect("Null pointer dereference") +} + +/// ```c +/// bool lean_is_scalar(lean_object * o) { return ((size_t)(o) & 1) == 1; } +/// ``` +#[inline] +pub fn lean_is_scalar(ptr: *const T) -> bool { + ptr as usize & 1 == 1 +} + +/// Create a CString from a str, stripping any interior null bytes. +/// Lean strings are length-prefixed and can contain null bytes, but the +/// `lean_mk_string` FFI requires a null-terminated C string. This function +/// ensures conversion always succeeds by filtering out interior nulls. +pub fn safe_cstring(s: &str) -> CString { + CString::new(s).unwrap_or_else(|_| { + let bytes: Vec = s.bytes().filter(|&b| b != 0).collect(); + CString::new(bytes).expect("filtered string should have no nulls") + }) +} + +#[macro_export] +/// ```c +/// lean_object * lean_box(size_t n) { return (lean_object*)(((size_t)(n) << 1) | 1); } +/// ``` +macro_rules! lean_box { + ($e:expr) => { + (($e << 1) | 1) as *const std::ffi::c_void + }; +} + +/// ```c +/// size_t lean_unbox(lean_object * o) { return (size_t)(o) >> 1; } +/// ``` +#[macro_export] +macro_rules! lean_unbox { + ($t:ident, $e:expr) => { + $t::try_from(($e as usize) >> 1).expect("Unintended truncation") + }; +} + +/// ```c +/// unsigned lean_unbox_uint32(b_lean_obj_arg o) { +/// if (sizeof(void*) == 4) { +/// /* 32-bit implementation */ +/// return lean_ctor_get_uint32(o, 0); +/// } else { +/// /* 64-bit implementation */ +/// return lean_unbox(o); +/// } +/// } +/// ``` +#[inline] +pub fn lean_unbox_u32(ptr: *const c_void) -> u32 { + if cfg!(target_pointer_width = "32") { + let boxed_usize: &BoxedUSize = as_ref_unsafe(ptr.cast()); + u32::try_from(boxed_usize.value).expect("Cannot convert from usize") + } else { + lean_unbox!(u32, ptr) + } +} + +/// ```c +/// uint64_t lean_unbox_uint64(b_lean_obj_arg o) { +/// return lean_ctor_get_uint64(o, 0); +/// } +/// ``` +#[inline] +pub fn lean_unbox_u64(ptr: *const c_void) -> u64 { + let boxed_usize: &BoxedU64 = as_ref_unsafe(ptr.cast()); + boxed_usize.value +} + +/// ```c +/// lean_object * lean_box_uint64(uint64_t v) { +/// lean_object * r = lean_alloc_ctor(0, 0, sizeof(uint64_t)); +/// lean_ctor_set_uint64(r, 0, v); +/// return r; +/// } +/// ``` +#[inline] +pub fn lean_box_u64(v: u64) -> *mut c_void { + unsafe { + let obj = lean_alloc_ctor(0, 0, 8); + lean_ctor_set_uint64(obj, 0, v); + obj + } +} + +pub fn boxed_usize_ptr_to_usize(ptr: *const c_void) -> usize { + let boxed_usize_ptr = ptr.cast::(); + let boxed_usize = as_ref_unsafe(boxed_usize_ptr); + boxed_usize.value +} + +/// Emulates arrays of flexible size from C. +#[repr(C)] +pub struct CArray([T; 0]); + +impl CArray { + #[inline] + pub fn slice(&self, len: usize) -> &[T] { + unsafe { std::slice::from_raw_parts(self.0.as_ptr(), len) } + } + + #[inline] + pub fn slice_mut(&mut self, len: usize) -> &mut [T] { + unsafe { std::slice::from_raw_parts_mut(self.0.as_mut_ptr(), len) } + } + + #[inline] + pub fn copy_from_slice(&mut self, src: &[T]) { + unsafe { + std::ptr::copy_nonoverlapping( + src.as_ptr(), + self.0.as_ptr() as *mut _, + src.len(), + ); + } + } +} + +pub struct ListIterator(*const c_void); + +impl Iterator for ListIterator { + type Item = *const c_void; + fn next(&mut self) -> Option { + let ptr = self.0; + if lean_is_scalar(ptr) { + return None; + } + let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); + let [head_ptr, tail_ptr] = ctor.objs(); + self.0 = tail_ptr; + Some(head_ptr) + } +} + +pub fn collect_list( + mut ptr: *const c_void, + map_fn: fn(*const c_void) -> T, +) -> Vec { + let mut vec = Vec::new(); + while !lean_is_scalar(ptr) { + let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); + let [head_ptr, tail_ptr] = ctor.objs(); + vec.push(map_fn(head_ptr)); + ptr = tail_ptr; + } + vec +} + +pub fn collect_list_with( + mut ptr: *const c_void, + map_fn: fn(*const c_void, &mut C) -> T, + c: &mut C, +) -> Vec { + let mut vec = Vec::new(); + while !lean_is_scalar(ptr) { + let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); + let [head_ptr, tail_ptr] = ctor.objs(); + vec.push(map_fn(head_ptr, c)); + ptr = tail_ptr; + } + vec +} + +// ============================================================================= +// Lean C API extern declarations for object construction +// ============================================================================= + +use std::ffi::c_uint; + +// Lean C API wrappers (defined in c/ixon_ffi.c) +// These wrap Lean's allocation functions so they can be linked from Rust +unsafe extern "C" { + // Object allocation + /// Allocate a constructor object with the given tag, number of object fields, + /// and scalar size in bytes. + #[link_name = "c_lean_alloc_ctor"] + pub fn lean_alloc_ctor( + tag: c_uint, + num_objs: c_uint, + scalar_sz: c_uint, + ) -> *mut c_void; + + /// Set the i-th object field of a constructor. + #[link_name = "c_lean_ctor_set"] + pub fn lean_ctor_set(o: *mut c_void, i: c_uint, v: *mut c_void); + + /// Get the i-th object field of a constructor. + #[link_name = "c_lean_ctor_get"] + pub fn lean_ctor_get(o: *mut c_void, i: c_uint) -> *const c_void; + + /// Get the tag of a Lean object. + #[link_name = "c_lean_obj_tag"] + pub fn lean_obj_tag(o: *mut c_void) -> c_uint; + + /// Set a uint8 scalar field at the given byte offset (after object fields). + #[link_name = "c_lean_ctor_set_uint8"] + pub fn lean_ctor_set_uint8(o: *mut c_void, offset: usize, v: u8); + + /// Set a uint64 scalar field at the given byte offset (after object fields). + #[link_name = "c_lean_ctor_set_uint64"] + pub fn lean_ctor_set_uint64(o: *mut c_void, offset: usize, v: u64); + + // String allocation + /// Create a Lean string from a null-terminated C string. + #[link_name = "c_lean_mk_string"] + pub fn lean_mk_string(s: *const std::ffi::c_char) -> *mut c_void; + + // Scalar array (ByteArray) allocation + /// Allocate a scalar array with the given element size, initial size, and capacity. + #[link_name = "c_lean_alloc_sarray"] + pub fn lean_alloc_sarray( + elem_size: c_uint, + size: usize, + capacity: usize, + ) -> *mut c_void; + + /// Get a pointer to the data area of a scalar array. + #[link_name = "c_lean_sarray_cptr"] + pub fn lean_sarray_cptr(o: *mut c_void) -> *mut u8; + + // Array allocation + /// Allocate an array with the given initial size and capacity. + #[link_name = "c_lean_alloc_array"] + pub fn lean_alloc_array(size: usize, capacity: usize) -> *mut c_void; + + /// Set the i-th element of an array (does not update size). + #[link_name = "c_lean_array_set_core"] + pub fn lean_array_set_core(o: *mut c_void, i: usize, v: *mut c_void); + + /// Get the i-th element of an array. + #[link_name = "c_lean_array_get_core"] + pub fn lean_array_get_core(o: *mut c_void, i: usize) -> *const c_void; + + // Reference counting + /// Increment the reference count of a Lean object. + #[link_name = "c_lean_inc"] + pub fn lean_inc(o: *mut c_void); + + /// Increment the reference count by n. + #[link_name = "c_lean_inc_n"] + pub fn lean_inc_n(o: *mut c_void, n: usize); + + // IO result construction + /// Wrap a value in a successful IO result. + #[link_name = "c_lean_io_result_mk_ok"] + pub fn lean_io_result_mk_ok(v: *mut c_void) -> *mut c_void; + + /// Wrap an error in an IO error result. + #[link_name = "c_lean_io_result_mk_error"] + pub fn lean_io_result_mk_error(err: *mut c_void) -> *mut c_void; + + /// Create an IO.Error.userError from a String. + #[link_name = "c_lean_mk_io_user_error"] + pub fn lean_mk_io_user_error(msg: *mut c_void) -> *mut c_void; + + // Nat allocation for large values + /// Create a Nat from a uint64. For values > max boxed, allocates on heap. + #[link_name = "c_lean_uint64_to_nat"] + pub fn lean_uint64_to_nat(n: u64) -> *mut c_void; + + /// Create a Nat from limbs (little-endian u64 array). Uses GMP internally. + #[link_name = "c_lean_nat_from_limbs"] + pub fn lean_nat_from_limbs( + num_limbs: usize, + limbs: *const u64, + ) -> *mut c_void; +} + +/// Box a scalar value into a Lean object pointer. +/// ```c +/// lean_object * lean_box(size_t n) { return (lean_object*)(((size_t)(n) << 1) | 1); } +/// ``` +#[inline] +pub fn lean_box_fn(n: usize) -> *mut c_void { + ((n << 1) | 1) as *mut c_void +} diff --git a/src/lean/ctor.rs b/src/lean/ctor.rs index de1d8db6..4e17f439 100644 --- a/src/lean/ctor.rs +++ b/src/lean/ctor.rs @@ -1,3 +1,5 @@ +//! Lean constructor object layout and field access. + use std::{ffi::c_void, ptr}; use super::{CArray, object::LeanObject}; @@ -33,4 +35,30 @@ impl LeanCtorObject { pub fn set_objs(&mut self, data: &[*const c_void]) { self.m_objs.copy_from_slice(data); } + + /// Read a u64 scalar field from the constructor. + /// `num_objs` is the number of object fields (pointers) in this constructor. + /// `scalar_offset` is the byte offset within the scalar area. + /// Scalar fields are stored after the object fields in memory. + #[inline] + pub fn get_scalar_u64(&self, num_objs: usize, scalar_offset: usize) -> u64 { + // Scalar area starts after: header (8 bytes) + object pointers (8 bytes each) + let base_ptr = (self as *const Self).cast::(); + let scalar_area = unsafe { base_ptr.add(8 + num_objs * 8 + scalar_offset) }; + unsafe { ptr::read_unaligned(scalar_area.cast::()) } + } + + /// Read a u8 scalar field from the constructor. + #[inline] + pub fn get_scalar_u8(&self, num_objs: usize, scalar_offset: usize) -> u8 { + let base_ptr = (self as *const Self).cast::(); + let scalar_area = unsafe { base_ptr.add(8 + num_objs * 8 + scalar_offset) }; + unsafe { *scalar_area } + } + + /// Read a bool scalar field from the constructor. + #[inline] + pub fn get_scalar_bool(&self, num_objs: usize, scalar_offset: usize) -> bool { + self.get_scalar_u8(num_objs, scalar_offset) != 0 + } } diff --git a/src/lean/ffi/mod.rs b/src/lean/ffi.rs similarity index 58% rename from src/lean/ffi/mod.rs rename to src/lean/ffi.rs index dfb8275f..07003a57 100644 --- a/src/lean/ffi/mod.rs +++ b/src/lean/ffi.rs @@ -1,17 +1,54 @@ pub mod aiur; pub mod byte_array; pub mod iroh; -pub mod ixon; pub mod keccak; pub mod lean_env; +// Modular FFI structure +pub mod builder; // IxEnvBuilder struct +pub mod compile; // Compilation: rs_compile_env_full, rs_compile_phases, etc. +pub mod graph; // Graph/SCC: rs_build_ref_graph, rs_compute_sccs +pub mod ix; // Ix types: Name, Level, Expr, ConstantInfo, Environment +pub mod ixon; // Ixon types: Univ, Expr, Constant, metadata +pub mod primitives; // Primitives: rs_roundtrip_nat, rs_roundtrip_string, etc. + use std::ffi::{CStr, CString, c_char, c_void}; use crate::lean::{ - array::LeanArrayObject, as_ref_unsafe, lean_unbox_u32, + array::LeanArrayObject, as_ref_unsafe, lean_io_result_mk_error, + lean_mk_io_user_error, lean_mk_string, lean_unbox_u32, sarray::LeanSArrayObject, }; +/// Guard an FFI function that returns a Lean IO result against panics. +/// On panic, returns a Lean IO error with the panic message instead of +/// unwinding across the `extern "C"` boundary (which is undefined behavior). +pub(crate) fn ffi_io_guard(f: F) -> *mut c_void +where + F: FnOnce() -> *mut c_void + std::panic::UnwindSafe, +{ + match std::panic::catch_unwind(f) { + Ok(result) => result, + Err(panic_info) => { + let msg = if let Some(s) = panic_info.downcast_ref::<&str>() { + format!("FFI panic: {s}") + } else if let Some(s) = panic_info.downcast_ref::() { + format!("FFI panic: {s}") + } else { + "FFI panic: unknown".to_string() + }; + let c_msg = CString::new(msg).unwrap_or_else(|_| { + CString::new("FFI panic: (invalid message)").unwrap() + }); + unsafe { + let lean_msg = lean_mk_string(c_msg.as_ptr()); + let lean_err = lean_mk_io_user_error(lean_msg); + lean_io_result_mk_error(lean_err) + } + }, + } +} + /// ```c /// typedef struct { /// bool is_ok; diff --git a/src/lean/ffi/aiur/mod.rs b/src/lean/ffi/aiur.rs similarity index 100% rename from src/lean/ffi/aiur/mod.rs rename to src/lean/ffi/aiur.rs diff --git a/src/lean/ffi/builder.rs b/src/lean/ffi/builder.rs new file mode 100644 index 00000000..fe0d80af --- /dev/null +++ b/src/lean/ffi/builder.rs @@ -0,0 +1,40 @@ +//! LeanBuildCache struct for constructing Lean Ix types with caching. + +use std::ffi::c_void; + +use blake3::Hash; +use rustc_hash::FxHashMap; + +/// Cache for constructing Lean Ix types with deduplication. +/// +/// This struct maintains caches for names, levels, and expressions to avoid +/// rebuilding the same Lean objects multiple times during environment construction. +pub struct LeanBuildCache { + pub(crate) names: FxHashMap, + pub(crate) levels: FxHashMap, + pub(crate) exprs: FxHashMap, +} + +impl LeanBuildCache { + pub fn new() -> Self { + Self { + names: FxHashMap::default(), + levels: FxHashMap::default(), + exprs: FxHashMap::default(), + } + } + + pub fn with_capacity(cap: usize) -> Self { + Self { + names: FxHashMap::with_capacity_and_hasher(cap, Default::default()), + levels: FxHashMap::with_capacity_and_hasher(cap, Default::default()), + exprs: FxHashMap::with_capacity_and_hasher(cap * 10, Default::default()), + } + } +} + +impl Default for LeanBuildCache { + fn default() -> Self { + Self::new() + } +} diff --git a/src/lean/ffi/compile.rs b/src/lean/ffi/compile.rs new file mode 100644 index 00000000..41c0a7a2 --- /dev/null +++ b/src/lean/ffi/compile.rs @@ -0,0 +1,1599 @@ +//! FFI bridge between Lean and Rust for the Ixon compilation/decompilation pipeline. +//! +//! Provides `extern "C"` functions callable from Lean via `@[extern]`: +//! - `rs_compile_env_full` / `rs_compile_env`: compile a Lean environment to Ixon +//! - `rs_compile_phases`: run individual pipeline phases (canon, condense, graph, compile) +//! - `rs_decompile_env`: decompile Ixon back to Lean environment +//! - `rs_roundtrip_*`: roundtrip FFI tests for Lean↔Rust type conversions +//! - `build_*` / `decode_*`: convert between Lean constructor layouts and Rust types +//! +//! ## Lean object layout conventions +//! +//! Lean constructors are allocated via `lean_alloc_ctor(tag, num_objs, scalar_size)`: +//! - Object fields are accessed with `lean_ctor_get(obj, i)` (0-indexed) +//! - Scalar fields follow objects at byte offset `8 + num_objs * 8` +//! - Scalar fields are accessed via pointer arithmetic on the object base + +use std::collections::HashMap; +use std::ffi::{CString, c_void}; +use std::sync::Arc; + +use super::ffi_io_guard; +use crate::ix::address::Address; +use crate::ix::compile::{CompileState, compile_env}; +use crate::ix::condense::compute_sccs; +use crate::ix::decompile::decompile_env; +use crate::ix::env::Name; +use crate::ix::graph::build_ref_graph; +use crate::ix::ixon::constant::{Constant as IxonConstant, ConstantInfo}; +use crate::ix::ixon::expr::Expr as IxonExpr; +use crate::ix::ixon::serialize::put_expr; +use crate::ix::ixon::{Comm, ConstantMeta}; +use crate::lean::nat::Nat; +use crate::lean::sarray::LeanSArrayObject; +use crate::lean::string::LeanStringObject; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_alloc_sarray, + lean_array_set_core, lean_ctor_get, lean_ctor_set, lean_ctor_set_uint8, + lean_ctor_set_uint64, lean_inc, lean_io_result_mk_error, + lean_io_result_mk_ok, lean_mk_io_user_error, lean_mk_string, lean_obj_tag, + lean_sarray_cptr, lean_uint64_to_nat, +}; + +use dashmap::DashMap; +use dashmap::DashSet; + +use super::builder::LeanBuildCache; +use super::graph::build_condensed_blocks; +use super::ix::constant::build_constant_info; +use super::ix::env::build_raw_environment; +use super::ix::name::build_name; +use super::ixon::constant::{ + build_address_from_ixon, build_ixon_constant, decode_ixon_address, +}; +use super::ixon::env::{ + build_raw_env, build_raw_name_entry, decode_raw_env, decoded_to_ixon_env, +}; +use super::ixon::meta::{build_constant_meta, build_ixon_comm}; +use super::lean_env::{GlobalCache, lean_ptr_to_env, lean_ptr_to_name}; + +// ============================================================================= +// Raw* Builder Functions for Compile FFI +// ============================================================================= + +/// Build RawConst: { addr : Address, const : Ixon.Constant } +pub fn build_raw_const(addr: &Address, constant: &IxonConstant) -> *mut c_void { + unsafe { + let addr_obj = build_address_from_ixon(addr); + let const_obj = build_ixon_constant(constant); + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, addr_obj); + lean_ctor_set(obj, 1, const_obj); + obj + } +} + +/// Build RawNamed: { name : Ix.Name, addr : Address, constMeta : Ixon.ConstantMeta } +pub fn build_raw_named( + cache: &mut LeanBuildCache, + name: &Name, + addr: &Address, + meta: &ConstantMeta, +) -> *mut c_void { + unsafe { + let name_obj = build_name(cache, name); + let addr_obj = build_address_from_ixon(addr); + let meta_obj = build_constant_meta(meta); + let obj = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(obj, 0, name_obj); + lean_ctor_set(obj, 1, addr_obj); + lean_ctor_set(obj, 2, meta_obj); + obj + } +} + +/// Build RawBlob: { addr : Address, bytes : ByteArray } +pub fn build_raw_blob(addr: &Address, bytes: &[u8]) -> *mut c_void { + unsafe { + let addr_obj = build_address_from_ixon(addr); + let ba = lean_alloc_sarray(1, bytes.len(), bytes.len()); + let ba_data = lean_sarray_cptr(ba); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), ba_data, bytes.len()); + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, addr_obj); + lean_ctor_set(obj, 1, ba); + obj + } +} + +/// Build RawComm: { addr : Address, comm : Ixon.Comm } +pub fn build_raw_comm(addr: &Address, comm: &Comm) -> *mut c_void { + unsafe { + let addr_obj = build_address_from_ixon(addr); + let comm_obj = build_ixon_comm(comm); + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, addr_obj); + lean_ctor_set(obj, 1, comm_obj); + obj + } +} + +// ============================================================================= +// RustCondensedBlocks roundtrip FFI +// ============================================================================= + +/// Round-trip a RustCondensedBlocks structure. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_rust_condensed_blocks( + ptr: *const c_void, +) -> *mut c_void { + unsafe { + let low_links = lean_ctor_get(ptr as *mut _, 0) as *mut c_void; + let blocks = lean_ctor_get(ptr as *mut _, 1) as *mut c_void; + let block_refs = lean_ctor_get(ptr as *mut _, 2) as *mut c_void; + + lean_inc(low_links); + lean_inc(blocks); + lean_inc(block_refs); + + let result = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(result, 0, low_links); + lean_ctor_set(result, 1, blocks); + lean_ctor_set(result, 2, block_refs); + result + } +} + +/// Round-trip a RustCompilePhases structure. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_rust_compile_phases( + ptr: *const c_void, +) -> *mut c_void { + unsafe { + let raw_env = lean_ctor_get(ptr as *mut _, 0) as *mut c_void; + let condensed = lean_ctor_get(ptr as *mut _, 1) as *mut c_void; + let compile_env = lean_ctor_get(ptr as *mut _, 2) as *mut c_void; + + lean_inc(raw_env); + lean_inc(condensed); + lean_inc(compile_env); + + let result = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(result, 0, raw_env); + lean_ctor_set(result, 1, condensed); + lean_ctor_set(result, 2, compile_env); + result + } +} + +// ============================================================================= +// BlockCompareResult and BlockCompareDetail roundtrip FFI +// ============================================================================= + +/// Round-trip a BlockCompareResult. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_block_compare_result( + ptr: *const c_void, +) -> *mut c_void { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => lean_alloc_ctor(0, 0, 0), + 1 => { + let base = ptr.cast::(); + let lean_size = *base.add(8).cast::(); + let rust_size = *base.add(16).cast::(); + let first_diff = *base.add(24).cast::(); + + let obj = lean_alloc_ctor(1, 0, 24); + let out_base = obj.cast::(); + *out_base.add(8).cast::() = lean_size; + *out_base.add(16).cast::() = rust_size; + *out_base.add(24).cast::() = first_diff; + obj + }, + 2 => lean_alloc_ctor(2, 0, 0), + _ => unreachable!("Invalid BlockCompareResult tag: {}", tag), + } + } +} + +/// Round-trip a BlockCompareDetail. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_block_compare_detail( + ptr: *const c_void, +) -> *mut c_void { + unsafe { + let result_ptr = lean_ctor_get(ptr as *mut _, 0); + let base = ptr.cast::(); + let lean_sharing_len = *base.add(16).cast::(); + let rust_sharing_len = *base.add(24).cast::(); + + let result_obj = rs_roundtrip_block_compare_result(result_ptr); + + let obj = lean_alloc_ctor(0, 1, 16); + lean_ctor_set(obj, 0, result_obj); + let out_base = obj.cast::(); + *out_base.add(16).cast::() = lean_sharing_len; + *out_base.add(24).cast::() = rust_sharing_len; + obj + } +} + +// ============================================================================= +// Full Compilation FFI +// ============================================================================= + +/// Create a Lean IO error result from a Rust error message. +unsafe fn make_compile_io_error(msg: &str) -> *mut c_void { + unsafe { + let c_msg = CString::new(msg) + .unwrap_or_else(|_| CString::new("compilation error").unwrap()); + let lean_msg = lean_mk_string(c_msg.as_ptr()); + let lean_err = lean_mk_io_user_error(lean_msg); + lean_io_result_mk_error(lean_err) + } +} + +/// FFI function to run the complete compilation pipeline and return all data. +#[unsafe(no_mangle)] +pub extern "C" fn rs_compile_env_full( + env_consts_ptr: *const c_void, +) -> *mut c_void { + ffi_io_guard(std::panic::AssertUnwindSafe(|| { + // Phase 1: Decode Lean environment + let rust_env = lean_ptr_to_env(env_consts_ptr); + let env_len = rust_env.len(); + let rust_env = Arc::new(rust_env); + + // Phase 2: Build ref graph and compute SCCs + let ref_graph = build_ref_graph(&rust_env); + let condensed = compute_sccs(&ref_graph.out_refs); + + // Phase 3: Compile + let compile_stt = match compile_env(&rust_env) { + Ok(stt) => stt, + Err(e) => { + let msg = + format!("rs_compile_env_full: Rust compilation failed: {:?}", e); + return unsafe { make_compile_io_error(&msg) }; + }, + }; + + // Phase 4: Build Lean structures + let mut cache = LeanBuildCache::with_capacity(env_len); + + unsafe { + let raw_env = build_raw_environment(&mut cache, &rust_env); + let condensed_obj = build_condensed_blocks(&mut cache, &condensed); + + // Collect blocks + let mut blocks_data: Vec<(Name, Vec, usize)> = Vec::new(); + let mut seen_addrs: std::collections::HashSet
= + std::collections::HashSet::new(); + + for entry in compile_stt.name_to_addr.iter() { + let name = entry.key().clone(); + let addr = entry.value().clone(); + + if seen_addrs.contains(&addr) { + continue; + } + seen_addrs.insert(addr.clone()); + + if let Some(constant) = compile_stt.env.get_const(&addr) { + let mut bytes = Vec::new(); + constant.put(&mut bytes); + let sharing_len = constant.sharing.len(); + blocks_data.push((name, bytes, sharing_len)); + } + } + + // Build blocks array + let blocks_arr = lean_alloc_array(blocks_data.len(), blocks_data.len()); + for (i, (name, bytes, sharing_len)) in blocks_data.iter().enumerate() { + let name_obj = build_name(&mut cache, name); + + let ba = lean_alloc_sarray(1, bytes.len(), bytes.len()); + let ba_data = lean_sarray_cptr(ba); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), ba_data, bytes.len()); + + let block = lean_alloc_ctor(0, 2, 8); + lean_ctor_set(block, 0, name_obj); + lean_ctor_set(block, 1, ba); + let base = block.cast::(); + *base.add(8 + 16).cast::() = *sharing_len as u64; + + lean_array_set_core(blocks_arr, i, block); + } + + // Build nameToAddr array + let name_to_addr_len = compile_stt.name_to_addr.len(); + let name_to_addr_arr = + lean_alloc_array(name_to_addr_len, name_to_addr_len); + for (i, entry) in compile_stt.name_to_addr.iter().enumerate() { + let name = entry.key(); + let addr = entry.value(); + + let name_obj = build_name(&mut cache, name); + + let addr_bytes = addr.as_bytes(); + let addr_ba = lean_alloc_sarray(1, 32, 32); + let addr_data = lean_sarray_cptr(addr_ba); + std::ptr::copy_nonoverlapping(addr_bytes.as_ptr(), addr_data, 32); + + let entry_obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(entry_obj, 0, name_obj); + lean_ctor_set(entry_obj, 1, addr_ba); + + lean_array_set_core(name_to_addr_arr, i, entry_obj); + } + + // Build RawCompiledEnv + let compiled_obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(compiled_obj, 0, blocks_arr); + lean_ctor_set(compiled_obj, 1, name_to_addr_arr); + + // Build RustCompilationResult + let result = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(result, 0, raw_env); + lean_ctor_set(result, 1, condensed_obj); + lean_ctor_set(result, 2, compiled_obj); + + lean_io_result_mk_ok(result) + } + })) +} + +/// FFI function to compile a Lean environment to serialized Ixon.Env bytes. +#[unsafe(no_mangle)] +pub extern "C" fn rs_compile_env(env_consts_ptr: *const c_void) -> *mut c_void { + ffi_io_guard(std::panic::AssertUnwindSafe(|| { + let rust_env = lean_ptr_to_env(env_consts_ptr); + let rust_env = Arc::new(rust_env); + + let compile_stt = match compile_env(&rust_env) { + Ok(stt) => stt, + Err(e) => { + let msg = format!("rs_compile_env: Rust compilation failed: {:?}", e); + return unsafe { make_compile_io_error(&msg) }; + }, + }; + + // Serialize the compiled Env to bytes + let mut buf = Vec::new(); + if let Err(e) = compile_stt.env.put(&mut buf) { + let msg = format!("rs_compile_env: Env serialization failed: {}", e); + return unsafe { make_compile_io_error(&msg) }; + } + + // Build Lean ByteArray + unsafe { + let ba = lean_alloc_sarray(1, buf.len(), buf.len()); + let ba_data = lean_sarray_cptr(ba); + std::ptr::copy_nonoverlapping(buf.as_ptr(), ba_data, buf.len()); + lean_io_result_mk_ok(ba) + } + })) +} + +/// Round-trip a RawEnv: decode from Lean, re-encode via builder. +/// This performs a full decode/build cycle to verify FFI correctness. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_raw_env( + raw_env_ptr: *const c_void, +) -> *mut c_void { + let env = decode_raw_env(raw_env_ptr); + build_raw_env(&env) +} + +/// FFI function to run all compilation phases and return combined results. +#[unsafe(no_mangle)] +pub extern "C" fn rs_compile_phases( + env_consts_ptr: *const c_void, +) -> *mut c_void { + ffi_io_guard(std::panic::AssertUnwindSafe(|| { + let rust_env = lean_ptr_to_env(env_consts_ptr); + let env_len = rust_env.len(); + let rust_env = Arc::new(rust_env); + + let mut cache = LeanBuildCache::with_capacity(env_len); + let raw_env = build_raw_environment(&mut cache, &rust_env); + + let ref_graph = build_ref_graph(&rust_env); + + let condensed = compute_sccs(&ref_graph.out_refs); + + let condensed_obj = build_condensed_blocks(&mut cache, &condensed); + + let compile_stt = match compile_env(&rust_env) { + Ok(stt) => stt, + Err(e) => { + let msg = format!("rs_compile_phases: compilation failed: {:?}", e); + return unsafe { make_compile_io_error(&msg) }; + }, + }; + // Build Lean objects from compile results + unsafe { + let consts: Vec<_> = compile_stt + .env + .consts + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + let consts_arr = lean_alloc_array(consts.len(), consts.len()); + for (i, (addr, constant)) in consts.iter().enumerate() { + let raw_const = build_raw_const(addr, constant); + lean_array_set_core(consts_arr, i, raw_const); + } + + let named: Vec<_> = compile_stt + .env + .named + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + let named_arr = lean_alloc_array(named.len(), named.len()); + for (i, (name, n)) in named.iter().enumerate() { + let raw_named = build_raw_named(&mut cache, name, &n.addr, &n.meta); + lean_array_set_core(named_arr, i, raw_named); + } + + let blobs: Vec<_> = compile_stt + .env + .blobs + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + let blobs_arr = lean_alloc_array(blobs.len(), blobs.len()); + for (i, (addr, bytes)) in blobs.iter().enumerate() { + let raw_blob = build_raw_blob(addr, bytes); + lean_array_set_core(blobs_arr, i, raw_blob); + } + + let comms: Vec<_> = compile_stt + .env + .comms + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + let comms_arr = lean_alloc_array(comms.len(), comms.len()); + for (i, (addr, comm)) in comms.iter().enumerate() { + let raw_comm = build_raw_comm(addr, comm); + lean_array_set_core(comms_arr, i, raw_comm); + } + + // Build names array (Address → Ix.Name) + let names: Vec<_> = compile_stt + .env + .names + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + let names_arr = lean_alloc_array(names.len(), names.len()); + for (i, (addr, name)) in names.iter().enumerate() { + let obj = build_raw_name_entry(&mut cache, addr, name); + lean_array_set_core(names_arr, i, obj); + } + + let raw_ixon_env = lean_alloc_ctor(0, 5, 0); + lean_ctor_set(raw_ixon_env, 0, consts_arr); + lean_ctor_set(raw_ixon_env, 1, named_arr); + lean_ctor_set(raw_ixon_env, 2, blobs_arr); + lean_ctor_set(raw_ixon_env, 3, comms_arr); + lean_ctor_set(raw_ixon_env, 4, names_arr); + + let result = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(result, 0, raw_env); + lean_ctor_set(result, 1, condensed_obj); + lean_ctor_set(result, 2, raw_ixon_env); + + lean_io_result_mk_ok(result) + } + })) +} + +/// FFI function to compile a Lean environment to a RawEnv. +#[unsafe(no_mangle)] +pub extern "C" fn rs_compile_env_to_ixon( + env_consts_ptr: *const c_void, +) -> *mut c_void { + ffi_io_guard(std::panic::AssertUnwindSafe(|| { + let rust_env = lean_ptr_to_env(env_consts_ptr); + let rust_env = Arc::new(rust_env); + + let compile_stt = match compile_env(&rust_env) { + Ok(stt) => stt, + Err(e) => { + let msg = + format!("rs_compile_env_to_ixon: compilation failed: {:?}", e); + return unsafe { make_compile_io_error(&msg) }; + }, + }; + + let mut cache = LeanBuildCache::with_capacity(rust_env.len()); + + unsafe { + let consts: Vec<_> = compile_stt + .env + .consts + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + let consts_arr = lean_alloc_array(consts.len(), consts.len()); + for (i, (addr, constant)) in consts.iter().enumerate() { + let raw_const = build_raw_const(addr, constant); + lean_array_set_core(consts_arr, i, raw_const); + } + + let named: Vec<_> = compile_stt + .env + .named + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + let named_arr = lean_alloc_array(named.len(), named.len()); + for (i, (name, n)) in named.iter().enumerate() { + let raw_named = build_raw_named(&mut cache, name, &n.addr, &n.meta); + lean_array_set_core(named_arr, i, raw_named); + } + + let blobs: Vec<_> = compile_stt + .env + .blobs + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + let blobs_arr = lean_alloc_array(blobs.len(), blobs.len()); + for (i, (addr, bytes)) in blobs.iter().enumerate() { + let raw_blob = build_raw_blob(addr, bytes); + lean_array_set_core(blobs_arr, i, raw_blob); + } + + let comms: Vec<_> = compile_stt + .env + .comms + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + let comms_arr = lean_alloc_array(comms.len(), comms.len()); + for (i, (addr, comm)) in comms.iter().enumerate() { + let raw_comm = build_raw_comm(addr, comm); + lean_array_set_core(comms_arr, i, raw_comm); + } + + // Build names array (Address → Ix.Name) + let names: Vec<_> = compile_stt + .env + .names + .iter() + .map(|e| (e.key().clone(), e.value().clone())) + .collect(); + let names_arr = lean_alloc_array(names.len(), names.len()); + for (i, (addr, name)) in names.iter().enumerate() { + let obj = build_raw_name_entry(&mut cache, addr, name); + lean_array_set_core(names_arr, i, obj); + } + + let result = lean_alloc_ctor(0, 5, 0); + lean_ctor_set(result, 0, consts_arr); + lean_ctor_set(result, 1, named_arr); + lean_ctor_set(result, 2, blobs_arr); + lean_ctor_set(result, 3, comms_arr); + lean_ctor_set(result, 4, names_arr); + lean_io_result_mk_ok(result) + } + })) +} + +/// FFI function to canonicalize environment to Ix.RawEnvironment. +#[unsafe(no_mangle)] +pub extern "C" fn rs_canonicalize_env_to_ix( + env_consts_ptr: *const c_void, +) -> *mut c_void { + ffi_io_guard(std::panic::AssertUnwindSafe(|| { + let rust_env = lean_ptr_to_env(env_consts_ptr); + let mut cache = LeanBuildCache::with_capacity(rust_env.len()); + let raw_env = build_raw_environment(&mut cache, &rust_env); + unsafe { lean_io_result_mk_ok(raw_env) } + })) +} + +// ============================================================================= +// RustCompiledEnv - Holds Rust compilation results for comparison +// ============================================================================= + +/// Rust-compiled environment holding blocks indexed by low-link name. +/// Each block is stored as serialized bytes for comparison with Lean output. +pub struct RustCompiledEnv { + /// Map from low-link name to (serialized constant bytes, sharing vector length) + blocks: HashMap, usize)>, + /// The full compile state for accessing pre-sharing expressions + compile_state: CompileState, +} + +// ============================================================================= +// Block-by-block comparison FFI +// ============================================================================= + +/// FFI: Simple test to verify FFI round-trip works. +/// Takes a Lean.Name and returns a magic number to verify the call succeeded. +#[unsafe(no_mangle)] +extern "C" fn rs_test_ffi_roundtrip(name_ptr: *const c_void) -> u64 { + let global_cache = GlobalCache::default(); + let name = lean_ptr_to_name(name_ptr, &global_cache); + + // Return a magic number plus the hash of the name to verify it worked + let hash = name.get_hash(); + let hash_bytes = hash.as_bytes(); + let hash_prefix = + u64::from_le_bytes(hash_bytes[0..8].try_into().unwrap_or([0u8; 8])); + + // Magic number 0xDEADBEEF plus hash prefix + 0xDEAD_BEEF_0000_0000 | (hash_prefix & 0x0000_0000_FFFF_FFFF) +} + +/// FFI: Compile entire environment with Rust, returning a handle to RustCompiledEnv. +/// Takes: +/// - env_consts_ptr: pointer to List (Name x ConstantInfo) from Lean environment +/// +/// Returns: pointer to RustCompiledEnv (or null on failure) +#[unsafe(no_mangle)] +extern "C" fn rs_compile_env_rust_first( + env_consts_ptr: *const c_void, +) -> *mut RustCompiledEnv { + // Decode Lean environment + let lean_env = lean_ptr_to_env(env_consts_ptr); + let lean_env = Arc::new(lean_env); + + // Compile with Rust + let rust_stt = match compile_env(&lean_env) { + Ok(stt) => stt, + Err(_e) => { + return std::ptr::null_mut(); + }, + }; + + // Build block map: lowlink name -> (serialized bytes, sharing len) + let mut blocks: HashMap, usize)> = HashMap::new(); + + // Iterate over all names and their addresses + for entry in rust_stt.name_to_addr.iter() { + let name = entry.key().clone(); + let addr = entry.value().clone(); + + // Skip if we already have this block (multiple names map to same block) + if blocks.contains_key(&name) { + continue; + } + + // Get the compiled constant + if let Some(constant) = rust_stt.env.get_const(&addr) { + let mut bytes = Vec::new(); + constant.put(&mut bytes); + let sharing_len = constant.sharing.len(); + blocks.insert(name, (bytes, sharing_len)); + } + } + + // Return boxed RustCompiledEnv with full compile state for pre-sharing access + Box::into_raw(Box::new(RustCompiledEnv { blocks, compile_state: rust_stt })) +} + +/// FFI: Compare a single block and return packed result. +/// Returns a packed u64: high 32 bits = matches (1) or error code (0 = mismatch, 2 = not found) +/// low 32 bits = first diff offset (if mismatch) +#[unsafe(no_mangle)] +extern "C" fn rs_compare_block( + rust_env: *const RustCompiledEnv, + lowlink_name: *const c_void, + lean_bytes: &LeanSArrayObject, +) -> u64 { + if rust_env.is_null() { + return 2u64 << 32; // not found + } + let global_cache = GlobalCache::default(); + let name = lean_ptr_to_name(lowlink_name, &global_cache); + + let rust_env = unsafe { &*rust_env }; + let lean_data = lean_bytes.data(); + + // Look up Rust's compiled block + let rust_bytes = match rust_env.blocks.get(&name) { + Some((bytes, _)) => bytes, + None => { + // Block not found in Rust compilation: code 2 + return 2u64 << 32; + }, + }; + + // Compare bytes + if rust_bytes == lean_data { + // Match: code 1 + return 1u64 << 32; + } + + // Mismatch: find first differing byte + rust_bytes.iter().zip(lean_data.iter()).position(|(a, b)| a != b).map_or_else( + || { + // One is a prefix of the other + rust_bytes.len().min(lean_data.len()) as u64 + }, + |i| i as u64, + ) +} + +/// FFI: Free a RustCompiledEnv. +#[unsafe(no_mangle)] +extern "C" fn rs_free_rust_env(rust_env: *mut RustCompiledEnv) { + if !rust_env.is_null() { + unsafe { + drop(Box::from_raw(rust_env)); + } + } +} + +/// FFI: Get the number of blocks in a RustCompiledEnv. +#[unsafe(no_mangle)] +extern "C" fn rs_get_rust_env_block_count( + rust_env: *const RustCompiledEnv, +) -> u64 { + if rust_env.is_null() { + return 0; + } + let rust_env = unsafe { &*rust_env }; + rust_env.blocks.len() as u64 +} + +/// FFI: Get Rust's compiled bytes length for a block. +#[unsafe(no_mangle)] +extern "C" fn rs_get_block_bytes_len( + rust_env: *const RustCompiledEnv, + lowlink_name: *const c_void, +) -> u64 { + if rust_env.is_null() { + return 0; + } + let global_cache = GlobalCache::default(); + let name = lean_ptr_to_name(lowlink_name, &global_cache); + + let rust_env = unsafe { &*rust_env }; + + match rust_env.blocks.get(&name) { + Some((bytes, _)) => bytes.len() as u64, + None => 0, + } +} + +/// FFI: Copy Rust's compiled bytes into a pre-allocated Lean ByteArray. +#[unsafe(no_mangle)] +extern "C" fn rs_copy_block_bytes( + rust_env: *const RustCompiledEnv, + lowlink_name: *const c_void, + dest: *mut c_void, +) { + if rust_env.is_null() { + return; + } + let global_cache = GlobalCache::default(); + let name = lean_ptr_to_name(lowlink_name, &global_cache); + + let rust_env = unsafe { &*rust_env }; + + let bytes = match rust_env.blocks.get(&name) { + Some((bytes, _)) => bytes, + None => return, + }; + + // Copy into the Lean ByteArray + let dest_arr: &mut LeanSArrayObject = unsafe { &mut *dest.cast() }; + dest_arr.set_data(bytes); +} + +/// FFI: Get Rust's sharing vector length for a block. +#[unsafe(no_mangle)] +extern "C" fn rs_get_block_sharing_len( + rust_env: *const RustCompiledEnv, + lowlink_name: *const c_void, +) -> u64 { + if rust_env.is_null() { + return 0; + } + let global_cache = GlobalCache::default(); + let name = lean_ptr_to_name(lowlink_name, &global_cache); + + let rust_env = unsafe { &*rust_env }; + + match rust_env.blocks.get(&name) { + Some((_, sharing_len)) => *sharing_len as u64, + None => 0, + } +} + +// ============================================================================= +// Pre-sharing expression extraction FFI +// ============================================================================= + +/// Frame for iterative unshare traversal. +enum UnshareFrame<'a> { + Visit(&'a Arc), + BuildApp, + BuildLam, + BuildAll, + BuildLet(bool), + BuildPrj(u64, u64), +} + +/// Expand Share(idx) references in an expression using the sharing vector. +/// This reconstructs the "pre-sharing" expression from the post-sharing +/// representation. Uses iterative traversal to avoid stack overflow on deep +/// expressions. +#[allow(clippy::cast_possible_truncation)] +fn unshare_expr( + expr: &Arc, + sharing: &[Arc], +) -> Arc { + let mut stack: Vec> = vec![UnshareFrame::Visit(expr)]; + let mut results: Vec> = Vec::new(); + + while let Some(frame) = stack.pop() { + match frame { + UnshareFrame::Visit(e) => match e.as_ref() { + IxonExpr::Share(idx) => { + if (*idx as usize) < sharing.len() { + stack.push(UnshareFrame::Visit(&sharing[*idx as usize])); + } else { + results.push(e.clone()); + } + }, + IxonExpr::App(f, a) => { + stack.push(UnshareFrame::BuildApp); + stack.push(UnshareFrame::Visit(a)); + stack.push(UnshareFrame::Visit(f)); + }, + IxonExpr::Lam(t, b) => { + stack.push(UnshareFrame::BuildLam); + stack.push(UnshareFrame::Visit(b)); + stack.push(UnshareFrame::Visit(t)); + }, + IxonExpr::All(t, b) => { + stack.push(UnshareFrame::BuildAll); + stack.push(UnshareFrame::Visit(b)); + stack.push(UnshareFrame::Visit(t)); + }, + IxonExpr::Let(nd, t, v, b) => { + stack.push(UnshareFrame::BuildLet(*nd)); + stack.push(UnshareFrame::Visit(b)); + stack.push(UnshareFrame::Visit(v)); + stack.push(UnshareFrame::Visit(t)); + }, + IxonExpr::Prj(ti, fi, v) => { + stack.push(UnshareFrame::BuildPrj(*ti, *fi)); + stack.push(UnshareFrame::Visit(v)); + }, + // Leaf nodes - no children to unshare + _ => results.push(e.clone()), + }, + UnshareFrame::BuildApp => { + let a = results.pop().unwrap(); + let f = results.pop().unwrap(); + results.push(Arc::new(IxonExpr::App(f, a))); + }, + UnshareFrame::BuildLam => { + let b = results.pop().unwrap(); + let t = results.pop().unwrap(); + results.push(Arc::new(IxonExpr::Lam(t, b))); + }, + UnshareFrame::BuildAll => { + let b = results.pop().unwrap(); + let t = results.pop().unwrap(); + results.push(Arc::new(IxonExpr::All(t, b))); + }, + UnshareFrame::BuildLet(nd) => { + let b = results.pop().unwrap(); + let v = results.pop().unwrap(); + let t = results.pop().unwrap(); + results.push(Arc::new(IxonExpr::Let(nd, t, v, b))); + }, + UnshareFrame::BuildPrj(ti, fi) => { + let v = results.pop().unwrap(); + results.push(Arc::new(IxonExpr::Prj(ti, fi, v))); + }, + } + } + + results.pop().unwrap() +} + +/// FFI: Get the pre-sharing root expressions for a constant. +/// Returns the number of root expressions, and writes serialized expressions to the output buffer. +/// Each expression is serialized without sharing (Share nodes are expanded). +/// +/// Output format: [n_exprs:u64, len1:u64, expr1_bytes..., len2:u64, expr2_bytes..., ...] +#[unsafe(no_mangle)] +extern "C" fn rs_get_pre_sharing_exprs( + rust_env: *const RustCompiledEnv, + lowlink_name: *const c_void, + out_buf: *mut c_void, +) -> u64 { + if rust_env.is_null() { + return 0; + } + let global_cache = GlobalCache::default(); + let name = lean_ptr_to_name(lowlink_name, &global_cache); + + let rust_env = unsafe { &*rust_env }; + + // Look up the address for this name + let addr = match rust_env.compile_state.name_to_addr.get(&name) { + Some(a) => a.clone(), + None => { + return 0; + }, + }; + + // Get the constant (note: contains post-sharing expressions) + let constant = match rust_env.compile_state.env.get_const(&addr) { + Some(c) => c, + None => { + return 0; + }, + }; + + // Extract root expressions from the constant info + let root_exprs: Vec> = match &constant.info { + ConstantInfo::Defn(def) => vec![def.typ.clone(), def.value.clone()], + ConstantInfo::Axio(ax) => vec![ax.typ.clone()], + ConstantInfo::Quot(q) => vec![q.typ.clone()], + ConstantInfo::Recr(rec) => { + let mut exprs = vec![rec.typ.clone()]; + for rule in &rec.rules { + exprs.push(rule.rhs.clone()); + } + exprs + }, + // Projections don't contain expressions directly + ConstantInfo::CPrj(_) + | ConstantInfo::RPrj(_) + | ConstantInfo::IPrj(_) + | ConstantInfo::DPrj(_) => { + vec![] + }, + ConstantInfo::Muts(muts) => { + let mut exprs = Vec::new(); + for mc in muts { + match mc { + crate::ix::ixon::constant::MutConst::Defn(def) => { + exprs.push(def.typ.clone()); + exprs.push(def.value.clone()); + }, + crate::ix::ixon::constant::MutConst::Indc(ind) => { + exprs.push(ind.typ.clone()); + for ctor in &ind.ctors { + exprs.push(ctor.typ.clone()); + } + }, + crate::ix::ixon::constant::MutConst::Recr(rec) => { + exprs.push(rec.typ.clone()); + for rule in &rec.rules { + exprs.push(rule.rhs.clone()); + } + }, + } + } + exprs + }, + }; + + // Unshare and serialize each root expression + let mut output_bytes: Vec = Vec::new(); + let n_exprs = root_exprs.len() as u64; + + // Write number of expressions + output_bytes.extend_from_slice(&n_exprs.to_le_bytes()); + + for expr in &root_exprs { + // Unshare the expression + let unshared = unshare_expr(expr, &constant.sharing); + + // Serialize to bytes + let mut expr_bytes: Vec = Vec::new(); + put_expr(&unshared, &mut expr_bytes); + + // Write length and bytes + output_bytes.extend_from_slice(&(expr_bytes.len() as u64).to_le_bytes()); + output_bytes.extend(expr_bytes); + } + + // Write to output buffer + let out_arr: &mut LeanSArrayObject = unsafe { &mut *out_buf.cast() }; + out_arr.set_data(&output_bytes); + + n_exprs +} + +/// FFI: Get the buffer length needed for pre-sharing expressions. +#[unsafe(no_mangle)] +extern "C" fn rs_get_pre_sharing_exprs_len( + rust_env: *const RustCompiledEnv, + lowlink_name: *const c_void, +) -> u64 { + if rust_env.is_null() { + return 0; + } + let global_cache = GlobalCache::default(); + let name = lean_ptr_to_name(lowlink_name, &global_cache); + + let rust_env = unsafe { &*rust_env }; + + // Look up the address for this name + let addr = match rust_env.compile_state.name_to_addr.get(&name) { + Some(a) => a.clone(), + None => return 0, + }; + + // Get the constant + let constant = match rust_env.compile_state.env.get_const(&addr) { + Some(c) => c, + None => return 0, + }; + + // Count root expressions + let n_exprs = match &constant.info { + ConstantInfo::Defn(_) => 2, + ConstantInfo::Axio(_) | ConstantInfo::Quot(_) => 1, + ConstantInfo::Recr(rec) => 1 + rec.rules.len(), + // Projections don't contain expressions directly + ConstantInfo::CPrj(_) + | ConstantInfo::RPrj(_) + | ConstantInfo::IPrj(_) + | ConstantInfo::DPrj(_) => 0, + ConstantInfo::Muts(muts) => { + let mut count = 0; + for mc in muts { + match mc { + crate::ix::ixon::constant::MutConst::Defn(_) => count += 2, + crate::ix::ixon::constant::MutConst::Indc(ind) => { + count += 1 + ind.ctors.len() + }, + crate::ix::ixon::constant::MutConst::Recr(rec) => { + count += 1 + rec.rules.len() + }, + } + } + count + }, + }; + + // Estimate: 8 bytes per header + some for expression data + // This is an upper bound estimate + (8 + n_exprs * 1024) as u64 +} + +/// FFI: Look up a constant's compiled address from RustCompiledEnv. +/// Copies the 32-byte blake3 hash into the provided ByteArray. +/// Returns 1 on success, 0 if name not found. +#[unsafe(no_mangle)] +extern "C" fn rs_lookup_const_addr( + rust_env: *const RustCompiledEnv, + name_ptr: *const c_void, + out_addr: *mut c_void, +) -> u64 { + if rust_env.is_null() { + return 0; + } + let global_cache = GlobalCache::default(); + let name = lean_ptr_to_name(name_ptr, &global_cache); + + let rust_env = unsafe { &*rust_env }; + + // Look up the address for this name + match rust_env.compile_state.name_to_addr.get(&name) { + Some(addr_ref) => { + // Copy the 32-byte address into the output ByteArray + let out_arr: &mut LeanSArrayObject = unsafe { &mut *out_addr.cast() }; + out_arr.set_data(addr_ref.as_bytes()); + 1 + }, + None => 0, + } +} + +/// FFI: Get the total number of compiled constants in RustCompiledEnv. +#[unsafe(no_mangle)] +extern "C" fn rs_get_compiled_const_count( + rust_env: *const RustCompiledEnv, +) -> u64 { + if rust_env.is_null() { + return 0; + } + let rust_env = unsafe { &*rust_env }; + rust_env.compile_state.name_to_addr.len() as u64 +} + +// ============================================================================= +// Error type FFI builders +// ============================================================================= + +use crate::ix::ixon::error::{CompileError, DecompileError, SerializeError}; + +/// Build a Lean String from a Rust &str. +fn build_lean_string(s: &str) -> *mut c_void { + let cstr = CString::new(s) + .unwrap_or_else(|_| CString::new("(invalid string)").unwrap()); + unsafe { lean_mk_string(cstr.as_ptr()) } +} + +/// Build a Lean Nat from a usize. +fn build_lean_nat_usize(n: usize) -> *mut c_void { + unsafe { lean_uint64_to_nat(n as u64) } +} + +/// Build a Lean Ixon.SerializeError from a Rust SerializeError. +/// +/// Tags 0–6: +/// 0: unexpectedEof (expected : String) → 1 obj +/// 1: invalidTag (tag : UInt8) (context : String) → 1 obj + 1 scalar (UInt8) +/// 2: invalidFlag (flag : UInt8) (context : String) → 1 obj + 1 scalar (UInt8) +/// 3: invalidVariant (variant : UInt64) (context : String) → 1 obj + 8 scalar (UInt64) +/// 4: invalidBool (value : UInt8) → 0 obj + 1 scalar (UInt8) +/// 5: addressError → 0 obj + 0 scalar +/// 6: invalidShareIndex (idx : UInt64) (max : Nat) → 1 obj (Nat) + 8 scalar (UInt64) +pub fn build_serialize_error(se: &SerializeError) -> *mut c_void { + unsafe { + match se { + SerializeError::UnexpectedEof { expected } => { + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, build_lean_string(expected)); + obj + }, + SerializeError::InvalidTag { tag, context } => { + // 1 obj (String) + 1 scalar byte (UInt8) + let obj = lean_alloc_ctor(1, 1, 1); + lean_ctor_set(obj, 0, build_lean_string(context)); + lean_ctor_set_uint8(obj, 8, *tag); + obj + }, + SerializeError::InvalidFlag { flag, context } => { + let obj = lean_alloc_ctor(2, 1, 1); + lean_ctor_set(obj, 0, build_lean_string(context)); + lean_ctor_set_uint8(obj, 8, *flag); + obj + }, + SerializeError::InvalidVariant { variant, context } => { + let obj = lean_alloc_ctor(3, 1, 8); + lean_ctor_set(obj, 0, build_lean_string(context)); + lean_ctor_set_uint64(obj, 8, *variant); + obj + }, + SerializeError::InvalidBool { value } => { + let obj = lean_alloc_ctor(4, 0, 1); + lean_ctor_set_uint8(obj, 0, *value); + obj + }, + SerializeError::AddressError => lean_alloc_ctor(5, 0, 0), + SerializeError::InvalidShareIndex { idx, max } => { + let obj = lean_alloc_ctor(6, 1, 8); + lean_ctor_set(obj, 0, build_lean_nat_usize(*max)); + lean_ctor_set_uint64(obj, 8, *idx); + obj + }, + } + } +} + +/// Decode a Lean Ixon.SerializeError to a Rust SerializeError. +pub fn decode_serialize_error(ptr: *const c_void) -> SerializeError { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => { + let str_ptr = lean_ctor_get(ptr as *mut _, 0); + let expected = + as_ref_unsafe::(str_ptr.cast()).as_string(); + SerializeError::UnexpectedEof { expected } + }, + 1 => { + let str_ptr = lean_ctor_get(ptr as *mut _, 0); + let base = ptr.cast::(); + let tag_val = *base.add(8 + 8); + let context = + as_ref_unsafe::(str_ptr.cast()).as_string(); + SerializeError::InvalidTag { tag: tag_val, context } + }, + 2 => { + let str_ptr = lean_ctor_get(ptr as *mut _, 0); + let base = ptr.cast::(); + let flag = *base.add(8 + 8); + let context = + as_ref_unsafe::(str_ptr.cast()).as_string(); + SerializeError::InvalidFlag { flag, context } + }, + 3 => { + let str_ptr = lean_ctor_get(ptr as *mut _, 0); + let base = ptr.cast::(); + let variant = *base.add(8 + 8).cast::(); + let context = + as_ref_unsafe::(str_ptr.cast()).as_string(); + SerializeError::InvalidVariant { variant, context } + }, + 4 => { + let base = ptr.cast::(); + let value = *base.add(8); + SerializeError::InvalidBool { value } + }, + 5 => SerializeError::AddressError, + 6 => { + let nat_ptr = lean_ctor_get(ptr as *mut _, 0); + let base = ptr.cast::(); + let idx = *base.add(8 + 8).cast::(); + let max = Nat::from_ptr(nat_ptr) + .to_u64() + .and_then(|x| usize::try_from(x).ok()) + .unwrap_or(0); + SerializeError::InvalidShareIndex { idx, max } + }, + _ => unreachable!("Invalid SerializeError tag: {}", tag), + } + } +} + +/// Build a Lean DecompileError from a Rust DecompileError. +/// +/// Layout for index variants (tags 0–4): +/// `(idx : UInt64) (len/max : Nat) (constant : String)` +/// → 2 object fields (Nat, String) + 8 scalar bytes (UInt64) +/// → `lean_alloc_ctor(tag, 2, 8)` +/// → obj[0] = Nat, obj[1] = String, scalar[0] = UInt64 +pub fn build_decompile_error(err: &DecompileError) -> *mut c_void { + unsafe { + match err { + DecompileError::InvalidRefIndex { idx, refs_len, constant } => { + let obj = lean_alloc_ctor(0, 2, 8); + lean_ctor_set(obj, 0, build_lean_nat_usize(*refs_len)); + lean_ctor_set(obj, 1, build_lean_string(constant)); + lean_ctor_set_uint64(obj, 2 * 8, *idx); + obj + }, + DecompileError::InvalidUnivIndex { idx, univs_len, constant } => { + let obj = lean_alloc_ctor(1, 2, 8); + lean_ctor_set(obj, 0, build_lean_nat_usize(*univs_len)); + lean_ctor_set(obj, 1, build_lean_string(constant)); + lean_ctor_set_uint64(obj, 2 * 8, *idx); + obj + }, + DecompileError::InvalidShareIndex { idx, max, constant } => { + let obj = lean_alloc_ctor(2, 2, 8); + lean_ctor_set(obj, 0, build_lean_nat_usize(*max)); + lean_ctor_set(obj, 1, build_lean_string(constant)); + lean_ctor_set_uint64(obj, 2 * 8, *idx); + obj + }, + DecompileError::InvalidRecIndex { idx, ctx_size, constant } => { + let obj = lean_alloc_ctor(3, 2, 8); + lean_ctor_set(obj, 0, build_lean_nat_usize(*ctx_size)); + lean_ctor_set(obj, 1, build_lean_string(constant)); + lean_ctor_set_uint64(obj, 2 * 8, *idx); + obj + }, + DecompileError::InvalidUnivVarIndex { idx, max, constant } => { + let obj = lean_alloc_ctor(4, 2, 8); + lean_ctor_set(obj, 0, build_lean_nat_usize(*max)); + lean_ctor_set(obj, 1, build_lean_string(constant)); + lean_ctor_set_uint64(obj, 2 * 8, *idx); + obj + }, + DecompileError::MissingAddress(addr) => { + // tag 5, 1 object (Address = ByteArray) + let obj = lean_alloc_ctor(5, 1, 0); + lean_ctor_set(obj, 0, build_address_from_ixon(addr)); + obj + }, + DecompileError::MissingMetadata(addr) => { + // tag 6, 1 object (Address = ByteArray) + let obj = lean_alloc_ctor(6, 1, 0); + lean_ctor_set(obj, 0, build_address_from_ixon(addr)); + obj + }, + DecompileError::BlobNotFound(addr) => { + // tag 7, 1 object (Address = ByteArray) + let obj = lean_alloc_ctor(7, 1, 0); + lean_ctor_set(obj, 0, build_address_from_ixon(addr)); + obj + }, + DecompileError::BadBlobFormat { addr, expected } => { + // tag 8, 2 objects (Address, String) + let obj = lean_alloc_ctor(8, 2, 0); + lean_ctor_set(obj, 0, build_address_from_ixon(addr)); + lean_ctor_set(obj, 1, build_lean_string(expected)); + obj + }, + DecompileError::BadConstantFormat { msg } => { + // tag 9, 1 object (String) + let obj = lean_alloc_ctor(9, 1, 0); + lean_ctor_set(obj, 0, build_lean_string(msg)); + obj + }, + DecompileError::Serialize(se) => { + // tag 10, 1 object (SerializeError) + let obj = lean_alloc_ctor(10, 1, 0); + lean_ctor_set(obj, 0, build_serialize_error(se)); + obj + }, + } + } +} + +/// Decode a Lean DecompileError to a Rust DecompileError. +pub fn decode_decompile_error(ptr: *const c_void) -> DecompileError { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => { + let nat_ptr = lean_ctor_get(ptr as *mut _, 0); + let str_ptr = lean_ctor_get(ptr as *mut _, 1); + let base = ptr.cast::(); + let idx = *base.add(8 + 2 * 8).cast::(); + let refs_len = Nat::from_ptr(nat_ptr) + .to_u64() + .and_then(|x| usize::try_from(x).ok()) + .unwrap_or(0); + let constant = + as_ref_unsafe::(str_ptr.cast()).as_string().clone(); + DecompileError::InvalidRefIndex { idx, refs_len, constant } + }, + 1 => { + let nat_ptr = lean_ctor_get(ptr as *mut _, 0); + let str_ptr = lean_ctor_get(ptr as *mut _, 1); + let base = ptr.cast::(); + let idx = *base.add(8 + 2 * 8).cast::(); + let univs_len = Nat::from_ptr(nat_ptr) + .to_u64() + .and_then(|x| usize::try_from(x).ok()) + .unwrap_or(0); + let constant = + as_ref_unsafe::(str_ptr.cast()).as_string().clone(); + DecompileError::InvalidUnivIndex { idx, univs_len, constant } + }, + 2 => { + let nat_ptr = lean_ctor_get(ptr as *mut _, 0); + let str_ptr = lean_ctor_get(ptr as *mut _, 1); + let base = ptr.cast::(); + let idx = *base.add(8 + 2 * 8).cast::(); + let max = Nat::from_ptr(nat_ptr) + .to_u64() + .and_then(|x| usize::try_from(x).ok()) + .unwrap_or(0); + let constant = + as_ref_unsafe::(str_ptr.cast()).as_string().clone(); + DecompileError::InvalidShareIndex { idx, max, constant } + }, + 3 => { + let nat_ptr = lean_ctor_get(ptr as *mut _, 0); + let str_ptr = lean_ctor_get(ptr as *mut _, 1); + let base = ptr.cast::(); + let idx = *base.add(8 + 2 * 8).cast::(); + let ctx_size = Nat::from_ptr(nat_ptr) + .to_u64() + .and_then(|x| usize::try_from(x).ok()) + .unwrap_or(0); + let constant = + as_ref_unsafe::(str_ptr.cast()).as_string().clone(); + DecompileError::InvalidRecIndex { idx, ctx_size, constant } + }, + 4 => { + let nat_ptr = lean_ctor_get(ptr as *mut _, 0); + let str_ptr = lean_ctor_get(ptr as *mut _, 1); + let base = ptr.cast::(); + let idx = *base.add(8 + 2 * 8).cast::(); + let max = Nat::from_ptr(nat_ptr) + .to_u64() + .and_then(|x| usize::try_from(x).ok()) + .unwrap_or(0); + let constant = + as_ref_unsafe::(str_ptr.cast()).as_string().clone(); + DecompileError::InvalidUnivVarIndex { idx, max, constant } + }, + 5 => { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + DecompileError::MissingAddress(decode_ixon_address(addr_ptr)) + }, + 6 => { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + DecompileError::MissingMetadata(decode_ixon_address(addr_ptr)) + }, + 7 => { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + DecompileError::BlobNotFound(decode_ixon_address(addr_ptr)) + }, + 8 => { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + let str_ptr = lean_ctor_get(ptr as *mut _, 1); + let addr = decode_ixon_address(addr_ptr); + let expected = + as_ref_unsafe::(str_ptr.cast()).as_string().clone(); + DecompileError::BadBlobFormat { addr, expected } + }, + 9 => { + let str_ptr = lean_ctor_get(ptr as *mut _, 0); + let msg = + as_ref_unsafe::(str_ptr.cast()).as_string().clone(); + DecompileError::BadConstantFormat { msg } + }, + 10 => { + let se_ptr = lean_ctor_get(ptr as *mut _, 0); + DecompileError::Serialize(decode_serialize_error(se_ptr)) + }, + _ => unreachable!("Invalid DecompileError tag: {}", tag), + } + } +} + +/// Build a Lean CompileError from a Rust CompileError. +/// +/// Tags 0–5: +/// 0: missingConstant (name : String) → 1 obj +/// 1: missingAddress (addr : Address) → 1 obj +/// 2: invalidMutualBlock (reason : String) → 1 obj +/// 3: unsupportedExpr (desc : String) → 1 obj +/// 4: unknownUnivParam (curr param : String) → 2 obj +/// 5: serializeError (msg : String) → 1 obj +pub fn build_compile_error(err: &CompileError) -> *mut c_void { + unsafe { + match err { + CompileError::MissingConstant { name } => { + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, build_lean_string(name)); + obj + }, + CompileError::MissingAddress(addr) => { + let obj = lean_alloc_ctor(1, 1, 0); + lean_ctor_set(obj, 0, build_address_from_ixon(addr)); + obj + }, + CompileError::InvalidMutualBlock { reason } => { + let obj = lean_alloc_ctor(2, 1, 0); + lean_ctor_set(obj, 0, build_lean_string(reason)); + obj + }, + CompileError::UnsupportedExpr { desc } => { + let obj = lean_alloc_ctor(3, 1, 0); + lean_ctor_set(obj, 0, build_lean_string(desc)); + obj + }, + CompileError::UnknownUnivParam { curr, param } => { + let obj = lean_alloc_ctor(4, 2, 0); + lean_ctor_set(obj, 0, build_lean_string(curr)); + lean_ctor_set(obj, 1, build_lean_string(param)); + obj + }, + CompileError::Serialize(se) => { + let obj = lean_alloc_ctor(5, 1, 0); + lean_ctor_set(obj, 0, build_serialize_error(se)); + obj + }, + } + } +} + +/// Decode a Lean CompileError to a Rust CompileError. +pub fn decode_compile_error(ptr: *const c_void) -> CompileError { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => { + let str_ptr = lean_ctor_get(ptr as *mut _, 0); + let name = + as_ref_unsafe::(str_ptr.cast()).as_string().clone(); + CompileError::MissingConstant { name } + }, + 1 => { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + CompileError::MissingAddress(decode_ixon_address(addr_ptr)) + }, + 2 => { + let str_ptr = lean_ctor_get(ptr as *mut _, 0); + let reason = + as_ref_unsafe::(str_ptr.cast()).as_string().clone(); + CompileError::InvalidMutualBlock { reason } + }, + 3 => { + let str_ptr = lean_ctor_get(ptr as *mut _, 0); + let desc = + as_ref_unsafe::(str_ptr.cast()).as_string().clone(); + CompileError::UnsupportedExpr { desc } + }, + 4 => { + let str0 = lean_ctor_get(ptr as *mut _, 0); + let str1 = lean_ctor_get(ptr as *mut _, 1); + let curr = + as_ref_unsafe::(str0.cast()).as_string().clone(); + let param = + as_ref_unsafe::(str1.cast()).as_string().clone(); + CompileError::UnknownUnivParam { curr, param } + }, + 5 => { + let se_ptr = lean_ctor_get(ptr as *mut _, 0); + CompileError::Serialize(decode_serialize_error(se_ptr)) + }, + _ => unreachable!("Invalid CompileError tag: {}", tag), + } + } +} + +/// FFI: Round-trip a DecompileError: Lean → Rust → Lean. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_decompile_error( + ptr: *const c_void, +) -> *mut c_void { + let err = decode_decompile_error(ptr); + build_decompile_error(&err) +} + +/// FFI: Round-trip a CompileError: Lean → Rust → Lean. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_compile_error( + ptr: *const c_void, +) -> *mut c_void { + let err = decode_compile_error(ptr); + build_compile_error(&err) +} + +/// FFI: Round-trip a SerializeError: Lean → Rust → Lean. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_serialize_error( + ptr: *const c_void, +) -> *mut c_void { + let err = decode_serialize_error(ptr); + build_serialize_error(&err) +} + +// ============================================================================= +// Decompilation FFI +// ============================================================================= + +/// FFI: Decompile an Ixon.RawEnv → Except DecompileError (Array (Ix.Name × Ix.ConstantInfo)). Pure. +#[unsafe(no_mangle)] +pub extern "C" fn rs_decompile_env(raw_env_ptr: *const c_void) -> *mut c_void { + let decoded = decode_raw_env(raw_env_ptr); + let env = decoded_to_ixon_env(&decoded); + + // Wrap in CompileState (decompile_env only uses .env) + let stt = CompileState { + env, + name_to_addr: DashMap::new(), + blocks: DashSet::new(), + block_stats: DashMap::new(), + }; + + match decompile_env(&stt) { + Ok(dstt) => { + let entries: Vec<_> = dstt.env.into_iter().collect(); + let mut cache = LeanBuildCache::with_capacity(entries.len()); + unsafe { + let arr = lean_alloc_array(entries.len(), entries.len()); + for (i, (name, info)) in entries.iter().enumerate() { + let name_obj = build_name(&mut cache, name); + let info_obj = build_constant_info(&mut cache, info); + let pair = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(pair, 0, name_obj); + lean_ctor_set(pair, 1, info_obj); + lean_array_set_core(arr, i, pair); + } + // Except.ok (tag 1) + let obj = lean_alloc_ctor(1, 1, 0); + lean_ctor_set(obj, 0, arr); + obj + } + }, + Err(e) => { + // Except.error (tag 0) — build DecompileError directly + unsafe { + let err_obj = build_decompile_error(&e); + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, err_obj); + obj + } + }, + } +} diff --git a/src/lean/ffi/graph.rs b/src/lean/ffi/graph.rs new file mode 100644 index 00000000..5258c35d --- /dev/null +++ b/src/lean/ffi/graph.rs @@ -0,0 +1,136 @@ +//! Graph and SCC FFI functions. + +use std::ffi::c_void; +use std::sync::Arc; + +use super::ffi_io_guard; +use crate::ix::condense::compute_sccs; +use crate::ix::graph::build_ref_graph; +use crate::lean::{ + lean_alloc_array, lean_alloc_ctor, lean_array_set_core, lean_ctor_set, + lean_io_result_mk_ok, +}; + +use super::builder::LeanBuildCache; +use super::ix::name::build_name; +use super::lean_env::lean_ptr_to_env; + +/// Build an Array (Ix.Name × Array Ix.Name) from a RefMap. +pub fn build_ref_graph_array( + cache: &mut LeanBuildCache, + refs: &crate::ix::graph::RefMap, +) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(refs.len(), refs.len()); + for (i, (name, ref_set)) in refs.iter().enumerate() { + let name_obj = build_name(cache, name); + + let refs_arr = lean_alloc_array(ref_set.len(), ref_set.len()); + for (j, ref_name) in ref_set.iter().enumerate() { + let ref_name_obj = build_name(cache, ref_name); + lean_array_set_core(refs_arr, j, ref_name_obj); + } + + let pair = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(pair, 0, name_obj); + lean_ctor_set(pair, 1, refs_arr); + + lean_array_set_core(arr, i, pair); + } + arr + } +} + +/// Build a RustCondensedBlocks structure. +pub fn build_condensed_blocks( + cache: &mut LeanBuildCache, + condensed: &crate::ix::condense::CondensedBlocks, +) -> *mut c_void { + unsafe { + // Build lowLinks: Array (Ix.Name × Ix.Name) + let low_links_arr = + lean_alloc_array(condensed.low_links.len(), condensed.low_links.len()); + for (i, (name, low_link)) in condensed.low_links.iter().enumerate() { + let name_obj = build_name(cache, name); + let low_link_obj = build_name(cache, low_link); + let pair = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(pair, 0, name_obj); + lean_ctor_set(pair, 1, low_link_obj); + lean_array_set_core(low_links_arr, i, pair); + } + + // Build blocks: Array (Ix.Name × Array Ix.Name) + let blocks_arr = + lean_alloc_array(condensed.blocks.len(), condensed.blocks.len()); + for (i, (name, block_set)) in condensed.blocks.iter().enumerate() { + let name_obj = build_name(cache, name); + let block_names_arr = lean_alloc_array(block_set.len(), block_set.len()); + for (j, block_name) in block_set.iter().enumerate() { + let block_name_obj = build_name(cache, block_name); + lean_array_set_core(block_names_arr, j, block_name_obj); + } + let pair = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(pair, 0, name_obj); + lean_ctor_set(pair, 1, block_names_arr); + lean_array_set_core(blocks_arr, i, pair); + } + + // Build blockRefs: Array (Ix.Name × Array Ix.Name) + let block_refs_arr = + lean_alloc_array(condensed.block_refs.len(), condensed.block_refs.len()); + for (i, (name, ref_set)) in condensed.block_refs.iter().enumerate() { + let name_obj = build_name(cache, name); + let refs_arr = lean_alloc_array(ref_set.len(), ref_set.len()); + for (j, ref_name) in ref_set.iter().enumerate() { + let ref_name_obj = build_name(cache, ref_name); + lean_array_set_core(refs_arr, j, ref_name_obj); + } + let pair = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(pair, 0, name_obj); + lean_ctor_set(pair, 1, refs_arr); + lean_array_set_core(block_refs_arr, i, pair); + } + + // Build RustCondensedBlocks structure (3 fields) + let result = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(result, 0, low_links_arr); + lean_ctor_set(result, 1, blocks_arr); + lean_ctor_set(result, 2, block_refs_arr); + result + } +} + +// ============================================================================= +// FFI Exports +// ============================================================================= + +/// FFI function to build a reference graph from a Lean environment. +#[unsafe(no_mangle)] +pub extern "C" fn rs_build_ref_graph( + env_consts_ptr: *const c_void, +) -> *mut c_void { + ffi_io_guard(std::panic::AssertUnwindSafe(|| { + let rust_env = lean_ptr_to_env(env_consts_ptr); + let rust_env = Arc::new(rust_env); + let ref_graph = build_ref_graph(&rust_env); + let mut cache = LeanBuildCache::with_capacity(rust_env.len()); + let result = build_ref_graph_array(&mut cache, &ref_graph.out_refs); + unsafe { lean_io_result_mk_ok(result) } + })) +} + +/// FFI function to compute SCCs from a Lean environment. +#[unsafe(no_mangle)] +pub extern "C" fn rs_compute_sccs( + env_consts_ptr: *const c_void, +) -> *mut c_void { + ffi_io_guard(std::panic::AssertUnwindSafe(|| { + let rust_env = lean_ptr_to_env(env_consts_ptr); + let rust_env = Arc::new(rust_env); + let ref_graph = build_ref_graph(&rust_env); + let condensed = compute_sccs(&ref_graph.out_refs); + let mut cache = LeanBuildCache::with_capacity(rust_env.len()); + let result = build_condensed_blocks(&mut cache, &condensed); + unsafe { lean_io_result_mk_ok(result) } + })) +} diff --git a/src/lean/ffi/ix.rs b/src/lean/ffi/ix.rs new file mode 100644 index 00000000..c205d33e --- /dev/null +++ b/src/lean/ffi/ix.rs @@ -0,0 +1,26 @@ +//! FFI for Ix types (canonical types with embedded hashes). +//! +//! This module provides build/decode/roundtrip functions for Ix types: +//! - Ix.Address - Blake3 hash wrapper +//! - Ix.Name - anonymous, str, num +//! - Ix.Level - zero, succ, max, imax, param, mvar +//! - Ix.Expr - 12 constructors +//! - Ix.ConstantInfo - 8 variants +//! - Ix.DataValue, Ix.Syntax, Ix.SourceInfo +//! - Ix.Environment + +pub mod address; +pub mod constant; +pub mod data; +pub mod env; +pub mod expr; +pub mod level; +pub mod name; + +pub use address::*; +pub use constant::*; +pub use data::*; +pub use env::*; +pub use expr::*; +pub use level::*; +pub use name::*; diff --git a/src/lean/ffi/ix/address.rs b/src/lean/ffi/ix/address.rs new file mode 100644 index 00000000..9b35abf8 --- /dev/null +++ b/src/lean/ffi/ix/address.rs @@ -0,0 +1,41 @@ +//! Ix.Address build/decode/roundtrip FFI. +//! +//! Address = { hash : ByteArray } - ByteArray wrapper for blake3 Hash + +use std::ffi::c_void; + +use crate::lean::{ + as_ref_unsafe, lean_alloc_sarray, lean_sarray_cptr, sarray::LeanSArrayObject, +}; + +/// Build a Ix.Address from a blake3::Hash. +/// Address = { hash : ByteArray } - single field struct, so UNBOXED to ByteArray +pub fn build_address(hash: &blake3::Hash) -> *mut c_void { + unsafe { + let bytes = hash.as_bytes(); + let ba = lean_alloc_sarray(1, bytes.len(), bytes.len()); + let data_ptr = lean_sarray_cptr(ba); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), data_ptr, bytes.len()); + ba // Due to unboxing, ByteArray IS the Address + } +} + +/// Round-trip an Ix.Address: decode ByteArray, re-encode. +/// Address = { hash : ByteArray } - single field struct, so UNBOXED to ByteArray directly +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_address( + addr_ptr: *const c_void, +) -> *mut c_void { + unsafe { + // Address is a single-field struct { hash : ByteArray } + // Due to unboxing, addr_ptr IS the ByteArray directly + let ba: &LeanSArrayObject = as_ref_unsafe(addr_ptr.cast()); + let bytes = ba.data(); + + // Rebuild ByteArray - this IS the Address due to unboxing + let new_ba = lean_alloc_sarray(1, bytes.len(), bytes.len()); + let data_ptr = lean_sarray_cptr(new_ba); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), data_ptr, bytes.len()); + new_ba + } +} diff --git a/src/lean/ffi/ix/constant.rs b/src/lean/ffi/ix/constant.rs new file mode 100644 index 00000000..19ebc7cf --- /dev/null +++ b/src/lean/ffi/ix/constant.rs @@ -0,0 +1,549 @@ +//! Ix.ConstantInfo build/decode/roundtrip FFI. +//! +//! ConstantInfo variants: +//! - Tag 0: axiomInfo (v : AxiomVal) +//! - Tag 1: defnInfo (v : DefinitionVal) +//! - Tag 2: thmInfo (v : TheoremVal) +//! - Tag 3: opaqueInfo (v : OpaqueVal) +//! - Tag 4: quotInfo (v : QuotVal) +//! - Tag 5: inductInfo (v : InductiveVal) +//! - Tag 6: ctorInfo (v : ConstructorVal) +//! - Tag 7: recInfo (v : RecursorVal) + +use std::ffi::c_void; + +use crate::ix::env::{ + AxiomVal, ConstantInfo, ConstantVal, ConstructorVal, DefinitionSafety, + DefinitionVal, InductiveVal, Name, OpaqueVal, QuotKind, QuotVal, + RecursorRule, RecursorVal, ReducibilityHints, TheoremVal, +}; +use crate::lean::array::LeanArrayObject; +use crate::lean::nat::Nat; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_array_set_core, + lean_box_fn, lean_ctor_get, lean_ctor_set, lean_ctor_set_uint8, + lean_is_scalar, lean_obj_tag, +}; + +use super::super::builder::LeanBuildCache; +use super::super::primitives::build_nat; +use super::expr::{build_expr, decode_ix_expr}; +use super::name::{ + build_name, build_name_array, decode_ix_name, decode_name_array, +}; + +/// Build a Ix.ConstantVal structure. +pub fn build_constant_val( + cache: &mut LeanBuildCache, + cv: &ConstantVal, +) -> *mut c_void { + unsafe { + // ConstantVal = { name : Name, levelParams : Array Name, type : Expr } + let name_obj = build_name(cache, &cv.name); + let level_params_obj = build_name_array(cache, &cv.level_params); + let type_obj = build_expr(cache, &cv.typ); + + let obj = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(obj, 0, name_obj); + lean_ctor_set(obj, 1, level_params_obj); + lean_ctor_set(obj, 2, type_obj); + obj + } +} + +/// Build ReducibilityHints. +/// NOTE: In Lean 4, 0-field constructors are boxed scalars when the inductive has +/// other constructors with fields. So opaque and abbrev use lean_box_fn. +pub fn build_reducibility_hints(hints: &ReducibilityHints) -> *mut c_void { + unsafe { + match hints { + // | opaque -- tag 0, boxed as scalar + ReducibilityHints::Opaque => lean_box_fn(0), + // | abbrev -- tag 1, boxed as scalar + ReducibilityHints::Abbrev => lean_box_fn(1), + // | regular (h : UInt32) -- tag 2, object constructor + ReducibilityHints::Regular(h) => { + // UInt32 is a scalar, stored inline + let obj = lean_alloc_ctor(2, 0, 4); + // Set the uint32 at offset 0 in the scalar area + let ptr = obj.cast::(); + *(ptr.add(8).cast::()) = *h; + obj + }, + } + } +} + +/// Build a Ix.ConstantInfo from a Rust ConstantInfo. +pub fn build_constant_info( + cache: &mut LeanBuildCache, + info: &ConstantInfo, +) -> *mut c_void { + unsafe { + match info { + // | axiomInfo (v : AxiomVal) -- tag 0 + ConstantInfo::AxiomInfo(v) => { + // AxiomVal = { cnst : ConstantVal, isUnsafe : Bool } + let cnst_obj = build_constant_val(cache, &v.cnst); + let axiom_val = lean_alloc_ctor(0, 1, 1); + lean_ctor_set(axiom_val, 0, cnst_obj); + lean_ctor_set_uint8(axiom_val, 8, v.is_unsafe as u8); + + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, axiom_val); + obj + }, + // | defnInfo (v : DefinitionVal) -- tag 1 + ConstantInfo::DefnInfo(v) => { + // DefinitionVal = { cnst, value, hints, safety, all } + // NOTE: safety (DefinitionSafety) is a small enum stored as SCALAR + // Memory layout: 4 obj fields (cnst, value, hints, all), 1 scalar byte (safety) + let cnst_obj = build_constant_val(cache, &v.cnst); + let value_obj = build_expr(cache, &v.value); + let hints_obj = build_reducibility_hints(&v.hints); + let all_obj = build_name_array(cache, &v.all); + let safety_byte = match v.safety { + DefinitionSafety::Unsafe => 0u8, + DefinitionSafety::Safe => 1u8, + DefinitionSafety::Partial => 2u8, + }; + + let defn_val = lean_alloc_ctor(0, 4, 1); // 4 obj fields, 1 scalar byte + lean_ctor_set(defn_val, 0, cnst_obj); + lean_ctor_set(defn_val, 1, value_obj); + lean_ctor_set(defn_val, 2, hints_obj); + lean_ctor_set(defn_val, 3, all_obj); + lean_ctor_set_uint8(defn_val, 4 * 8, safety_byte); + + let obj = lean_alloc_ctor(1, 1, 0); + lean_ctor_set(obj, 0, defn_val); + obj + }, + // | thmInfo (v : TheoremVal) -- tag 2 + ConstantInfo::ThmInfo(v) => { + // TheoremVal = { cnst, value, all } + let cnst_obj = build_constant_val(cache, &v.cnst); + let value_obj = build_expr(cache, &v.value); + let all_obj = build_name_array(cache, &v.all); + + let thm_val = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(thm_val, 0, cnst_obj); + lean_ctor_set(thm_val, 1, value_obj); + lean_ctor_set(thm_val, 2, all_obj); + + let obj = lean_alloc_ctor(2, 1, 0); + lean_ctor_set(obj, 0, thm_val); + obj + }, + // | opaqueInfo (v : OpaqueVal) -- tag 3 + ConstantInfo::OpaqueInfo(v) => { + // OpaqueVal = { cnst, value, isUnsafe, all } + let cnst_obj = build_constant_val(cache, &v.cnst); + let value_obj = build_expr(cache, &v.value); + let all_obj = build_name_array(cache, &v.all); + + let opaque_val = lean_alloc_ctor(0, 3, 1); + lean_ctor_set(opaque_val, 0, cnst_obj); + lean_ctor_set(opaque_val, 1, value_obj); + lean_ctor_set(opaque_val, 2, all_obj); + lean_ctor_set_uint8(opaque_val, 3 * 8, v.is_unsafe as u8); + + let obj = lean_alloc_ctor(3, 1, 0); + lean_ctor_set(obj, 0, opaque_val); + obj + }, + // | quotInfo (v : QuotVal) -- tag 4 + ConstantInfo::QuotInfo(v) => { + // QuotVal = { cnst, kind } + // NOTE: QuotKind is a small enum stored as SCALAR + // Memory layout: 1 obj field (cnst), 1 scalar byte (kind) + let cnst_obj = build_constant_val(cache, &v.cnst); + let kind_byte = match v.kind { + QuotKind::Type => 0u8, + QuotKind::Ctor => 1u8, + QuotKind::Lift => 2u8, + QuotKind::Ind => 3u8, + }; + + let quot_val = lean_alloc_ctor(0, 1, 1); // 1 obj field, 1 scalar byte + lean_ctor_set(quot_val, 0, cnst_obj); + lean_ctor_set_uint8(quot_val, 8, kind_byte); + + let obj = lean_alloc_ctor(4, 1, 0); + lean_ctor_set(obj, 0, quot_val); + obj + }, + // | inductInfo (v : InductiveVal) -- tag 5 + ConstantInfo::InductInfo(v) => { + // InductiveVal = { cnst, numParams, numIndices, all, ctors, numNested, isRec, isUnsafe, isReflexive } + let cnst_obj = build_constant_val(cache, &v.cnst); + let num_params_obj = build_nat(&v.num_params); + let num_indices_obj = build_nat(&v.num_indices); + let all_obj = build_name_array(cache, &v.all); + let ctors_obj = build_name_array(cache, &v.ctors); + let num_nested_obj = build_nat(&v.num_nested); + + // 6 object fields, 3 scalar bytes for bools + let induct_val = lean_alloc_ctor(0, 6, 3); + lean_ctor_set(induct_val, 0, cnst_obj); + lean_ctor_set(induct_val, 1, num_params_obj); + lean_ctor_set(induct_val, 2, num_indices_obj); + lean_ctor_set(induct_val, 3, all_obj); + lean_ctor_set(induct_val, 4, ctors_obj); + lean_ctor_set(induct_val, 5, num_nested_obj); + lean_ctor_set_uint8(induct_val, 6 * 8, v.is_rec as u8); + lean_ctor_set_uint8(induct_val, 6 * 8 + 1, v.is_unsafe as u8); + lean_ctor_set_uint8(induct_val, 6 * 8 + 2, v.is_reflexive as u8); + + let obj = lean_alloc_ctor(5, 1, 0); + lean_ctor_set(obj, 0, induct_val); + obj + }, + // | ctorInfo (v : ConstructorVal) -- tag 6 + ConstantInfo::CtorInfo(v) => { + // ConstructorVal = { cnst, induct, cidx, numParams, numFields, isUnsafe } + let cnst_obj = build_constant_val(cache, &v.cnst); + let induct_obj = build_name(cache, &v.induct); + let cidx_obj = build_nat(&v.cidx); + let num_params_obj = build_nat(&v.num_params); + let num_fields_obj = build_nat(&v.num_fields); + + // 5 object fields, 1 scalar byte for bool + let ctor_val = lean_alloc_ctor(0, 5, 1); + lean_ctor_set(ctor_val, 0, cnst_obj); + lean_ctor_set(ctor_val, 1, induct_obj); + lean_ctor_set(ctor_val, 2, cidx_obj); + lean_ctor_set(ctor_val, 3, num_params_obj); + lean_ctor_set(ctor_val, 4, num_fields_obj); + lean_ctor_set_uint8(ctor_val, 5 * 8, v.is_unsafe as u8); + + let obj = lean_alloc_ctor(6, 1, 0); + lean_ctor_set(obj, 0, ctor_val); + obj + }, + // | recInfo (v : RecursorVal) -- tag 7 + ConstantInfo::RecInfo(v) => { + // RecursorVal = { cnst, all, numParams, numIndices, numMotives, numMinors, rules, k, isUnsafe } + let cnst_obj = build_constant_val(cache, &v.cnst); + let all_obj = build_name_array(cache, &v.all); + let num_params_obj = build_nat(&v.num_params); + let num_indices_obj = build_nat(&v.num_indices); + let num_motives_obj = build_nat(&v.num_motives); + let num_minors_obj = build_nat(&v.num_minors); + let rules_obj = build_recursor_rules(cache, &v.rules); + + // 7 object fields, 2 scalar bytes for bools + let rec_val = lean_alloc_ctor(0, 7, 2); + lean_ctor_set(rec_val, 0, cnst_obj); + lean_ctor_set(rec_val, 1, all_obj); + lean_ctor_set(rec_val, 2, num_params_obj); + lean_ctor_set(rec_val, 3, num_indices_obj); + lean_ctor_set(rec_val, 4, num_motives_obj); + lean_ctor_set(rec_val, 5, num_minors_obj); + lean_ctor_set(rec_val, 6, rules_obj); + lean_ctor_set_uint8(rec_val, 7 * 8, v.k as u8); + lean_ctor_set_uint8(rec_val, 7 * 8 + 1, v.is_unsafe as u8); + + let obj = lean_alloc_ctor(7, 1, 0); + lean_ctor_set(obj, 0, rec_val); + obj + }, + } + } +} + +/// Build an Array of RecursorRule. +fn build_recursor_rules( + cache: &mut LeanBuildCache, + rules: &[RecursorRule], +) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(rules.len(), rules.len()); + for (i, rule) in rules.iter().enumerate() { + // RecursorRule = { ctor : Name, nFields : Nat, rhs : Expr } + let ctor_obj = build_name(cache, &rule.ctor); + let n_fields_obj = build_nat(&rule.n_fields); + let rhs_obj = build_expr(cache, &rule.rhs); + + let rule_obj = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(rule_obj, 0, ctor_obj); + lean_ctor_set(rule_obj, 1, n_fields_obj); + lean_ctor_set(rule_obj, 2, rhs_obj); + + lean_array_set_core(arr, i, rule_obj); + } + arr + } +} + +// ============================================================================= +// ConstantInfo Decoders +// ============================================================================= + +/// Decode Ix.ConstantVal from Lean pointer. +/// ConstantVal = { name : Name, levelParams : Array Name, type : Expr } +pub fn decode_constant_val(ptr: *const c_void) -> ConstantVal { + unsafe { + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let level_params_ptr = lean_ctor_get(ptr as *mut _, 1); + let type_ptr = lean_ctor_get(ptr as *mut _, 2); + + let name = decode_ix_name(name_ptr); + + let level_params_obj: &LeanArrayObject = + as_ref_unsafe(level_params_ptr.cast()); + let level_params: Vec = + level_params_obj.data().iter().map(|&p| decode_ix_name(p)).collect(); + + let typ = decode_ix_expr(type_ptr); + + ConstantVal { name, level_params, typ } + } +} + +/// Decode Lean.ReducibilityHints from Lean pointer. +/// | opaque -- tag 0 +/// | abbrev -- tag 1 +/// | regular (h : UInt32) -- tag 2 +/// +/// NOTE: In Lean 4, boxed scalars are `(tag << 1) | 1`: +/// - opaque (tag 0) → scalar value 1 +/// - abbrev (tag 1) → scalar value 3 +pub fn decode_reducibility_hints(ptr: *const c_void) -> ReducibilityHints { + unsafe { + if lean_is_scalar(ptr) { + // Unbox the scalar: tag = (ptr >> 1) + let tag = (ptr as usize) >> 1; + match tag { + 0 => return ReducibilityHints::Opaque, + 1 => return ReducibilityHints::Abbrev, + _ => panic!("Invalid ReducibilityHints scalar tag: {}", tag), + } + } + + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => ReducibilityHints::Opaque, + 1 => ReducibilityHints::Abbrev, + 2 => { + // regular: 0 obj fields, 4 scalar bytes (UInt32) + let ctor_ptr = ptr.cast::(); + let h = *(ctor_ptr.add(8).cast::()); + ReducibilityHints::Regular(h) + }, + _ => panic!("Invalid ReducibilityHints tag: {}", tag), + } + } +} + +/// Decode Ix.RecursorRule from Lean pointer. +/// RecursorRule = { ctor : Name, nfields : Nat, rhs : Expr } +fn decode_recursor_rule(ptr: *const c_void) -> RecursorRule { + unsafe { + let ctor_ptr = lean_ctor_get(ptr as *mut _, 0); + let n_fields_ptr = lean_ctor_get(ptr as *mut _, 1); + let rhs_ptr = lean_ctor_get(ptr as *mut _, 2); + + RecursorRule { + ctor: decode_ix_name(ctor_ptr), + n_fields: Nat::from_ptr(n_fields_ptr), + rhs: decode_ix_expr(rhs_ptr), + } + } +} + +/// Decode Ix.ConstantInfo from Lean pointer. +pub fn decode_constant_info(ptr: *const c_void) -> ConstantInfo { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + let inner_ptr = lean_ctor_get(ptr as *mut _, 0); + + match tag { + 0 => { + // axiomInfo: AxiomVal = { cnst : ConstantVal, isUnsafe : Bool } + // Structure: 1 obj field (cnst), 1 scalar byte (isUnsafe) + let cnst_ptr = lean_ctor_get(inner_ptr as *mut _, 0); + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(inner_ptr.cast()); + let is_unsafe = ctor.get_scalar_u8(1, 0) != 0; + + ConstantInfo::AxiomInfo(AxiomVal { + cnst: decode_constant_val(cnst_ptr), + is_unsafe, + }) + }, + 1 => { + // defnInfo: DefinitionVal = { cnst, value, hints, safety, all } + // NOTE: safety (DefinitionSafety) is a small enum and is stored as a SCALAR field + // Memory layout: 4 obj fields (cnst, value, hints, all), 1 scalar byte (safety) + let cnst_ptr = lean_ctor_get(inner_ptr as *mut _, 0); + let value_ptr = lean_ctor_get(inner_ptr as *mut _, 1); + let hints_ptr = lean_ctor_get(inner_ptr as *mut _, 2); + let all_ptr = lean_ctor_get(inner_ptr as *mut _, 3); // all is at index 3, not 4! + + // safety is a scalar at offset 4*8 = 32 bytes from start of object fields + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(inner_ptr.cast()); + let safety_byte = ctor.get_scalar_u8(4, 0); // 4 obj fields, offset 0 in scalar area + let safety = match safety_byte { + 0 => DefinitionSafety::Unsafe, + 1 => DefinitionSafety::Safe, + 2 => DefinitionSafety::Partial, + _ => panic!("Invalid DefinitionSafety: {}", safety_byte), + }; + + ConstantInfo::DefnInfo(DefinitionVal { + cnst: decode_constant_val(cnst_ptr), + value: decode_ix_expr(value_ptr), + hints: decode_reducibility_hints(hints_ptr), + safety, + all: decode_name_array(all_ptr), + }) + }, + 2 => { + // thmInfo: TheoremVal = { cnst, value, all } + let cnst_ptr = lean_ctor_get(inner_ptr as *mut _, 0); + let value_ptr = lean_ctor_get(inner_ptr as *mut _, 1); + let all_ptr = lean_ctor_get(inner_ptr as *mut _, 2); + + ConstantInfo::ThmInfo(TheoremVal { + cnst: decode_constant_val(cnst_ptr), + value: decode_ix_expr(value_ptr), + all: decode_name_array(all_ptr), + }) + }, + 3 => { + // opaqueInfo: OpaqueVal = { cnst, value, isUnsafe, all } + // Structure: 3 obj fields (cnst, value, all), 1 scalar byte (isUnsafe) + let cnst_ptr = lean_ctor_get(inner_ptr as *mut _, 0); + let value_ptr = lean_ctor_get(inner_ptr as *mut _, 1); + let all_ptr = lean_ctor_get(inner_ptr as *mut _, 2); + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(inner_ptr.cast()); + let is_unsafe = ctor.get_scalar_u8(3, 0) != 0; + + ConstantInfo::OpaqueInfo(OpaqueVal { + cnst: decode_constant_val(cnst_ptr), + value: decode_ix_expr(value_ptr), + is_unsafe, + all: decode_name_array(all_ptr), + }) + }, + 4 => { + // quotInfo: QuotVal = { cnst, kind } + // NOTE: QuotKind is a small enum (4 0-field ctors), stored as SCALAR + // Memory layout: 1 obj field (cnst), 1 scalar byte (kind) + let cnst_ptr = lean_ctor_get(inner_ptr as *mut _, 0); + + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(inner_ptr.cast()); + let kind_byte = ctor.get_scalar_u8(1, 0); // 1 obj field, offset 0 in scalar area + let kind = match kind_byte { + 0 => QuotKind::Type, + 1 => QuotKind::Ctor, + 2 => QuotKind::Lift, + 3 => QuotKind::Ind, + _ => panic!("Invalid QuotKind: {}", kind_byte), + }; + + ConstantInfo::QuotInfo(QuotVal { + cnst: decode_constant_val(cnst_ptr), + kind, + }) + }, + 5 => { + // inductInfo: InductiveVal = { cnst, numParams, numIndices, all, ctors, numNested, isRec, isUnsafe, isReflexive } + // 6 obj fields, 3 scalar bytes + let cnst_ptr = lean_ctor_get(inner_ptr as *mut _, 0); + let num_params_ptr = lean_ctor_get(inner_ptr as *mut _, 1); + let num_indices_ptr = lean_ctor_get(inner_ptr as *mut _, 2); + let all_ptr = lean_ctor_get(inner_ptr as *mut _, 3); + let ctors_ptr = lean_ctor_get(inner_ptr as *mut _, 4); + let num_nested_ptr = lean_ctor_get(inner_ptr as *mut _, 5); + + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(inner_ptr.cast()); + let is_rec = ctor.get_scalar_u8(6, 0) != 0; + let is_unsafe = ctor.get_scalar_u8(6, 1) != 0; + let is_reflexive = ctor.get_scalar_u8(6, 2) != 0; + + ConstantInfo::InductInfo(InductiveVal { + cnst: decode_constant_val(cnst_ptr), + num_params: Nat::from_ptr(num_params_ptr), + num_indices: Nat::from_ptr(num_indices_ptr), + all: decode_name_array(all_ptr), + ctors: decode_name_array(ctors_ptr), + num_nested: Nat::from_ptr(num_nested_ptr), + is_rec, + is_unsafe, + is_reflexive, + }) + }, + 6 => { + // ctorInfo: ConstructorVal = { cnst, induct, cidx, numParams, numFields, isUnsafe } + // 5 obj fields, 1 scalar byte + let cnst_ptr = lean_ctor_get(inner_ptr as *mut _, 0); + let induct_ptr = lean_ctor_get(inner_ptr as *mut _, 1); + let cidx_ptr = lean_ctor_get(inner_ptr as *mut _, 2); + let num_params_ptr = lean_ctor_get(inner_ptr as *mut _, 3); + let num_fields_ptr = lean_ctor_get(inner_ptr as *mut _, 4); + + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(inner_ptr.cast()); + let is_unsafe = ctor.get_scalar_u8(5, 0) != 0; + + ConstantInfo::CtorInfo(ConstructorVal { + cnst: decode_constant_val(cnst_ptr), + induct: decode_ix_name(induct_ptr), + cidx: Nat::from_ptr(cidx_ptr), + num_params: Nat::from_ptr(num_params_ptr), + num_fields: Nat::from_ptr(num_fields_ptr), + is_unsafe, + }) + }, + 7 => { + // recInfo: RecursorVal = { cnst, all, numParams, numIndices, numMotives, numMinors, rules, k, isUnsafe } + // 7 obj fields, 2 scalar bytes + let cnst_ptr = lean_ctor_get(inner_ptr as *mut _, 0); + let all_ptr = lean_ctor_get(inner_ptr as *mut _, 1); + let num_params_ptr = lean_ctor_get(inner_ptr as *mut _, 2); + let num_indices_ptr = lean_ctor_get(inner_ptr as *mut _, 3); + let num_motives_ptr = lean_ctor_get(inner_ptr as *mut _, 4); + let num_minors_ptr = lean_ctor_get(inner_ptr as *mut _, 5); + let rules_ptr = lean_ctor_get(inner_ptr as *mut _, 6); + + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(inner_ptr.cast()); + let k = ctor.get_scalar_u8(7, 0) != 0; + let is_unsafe = ctor.get_scalar_u8(7, 1) != 0; + + let rules_obj: &LeanArrayObject = as_ref_unsafe(rules_ptr.cast()); + let rules: Vec = + rules_obj.data().iter().map(|&p| decode_recursor_rule(p)).collect(); + + ConstantInfo::RecInfo(RecursorVal { + cnst: decode_constant_val(cnst_ptr), + all: decode_name_array(all_ptr), + num_params: Nat::from_ptr(num_params_ptr), + num_indices: Nat::from_ptr(num_indices_ptr), + num_motives: Nat::from_ptr(num_motives_ptr), + num_minors: Nat::from_ptr(num_minors_ptr), + rules, + k, + is_unsafe, + }) + }, + _ => panic!("Invalid ConstantInfo tag: {}", tag), + } + } +} + +/// Round-trip an Ix.ConstantInfo: decode from Lean, re-encode via LeanBuildCache. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_constant_info( + info_ptr: *const c_void, +) -> *mut c_void { + let info = decode_constant_info(info_ptr); + let mut cache = LeanBuildCache::new(); + build_constant_info(&mut cache, &info) +} diff --git a/src/lean/ffi/ix/data.rs b/src/lean/ffi/ix/data.rs new file mode 100644 index 00000000..e195c74e --- /dev/null +++ b/src/lean/ffi/ix/data.rs @@ -0,0 +1,530 @@ +//! Ix.DataValue, Ix.Syntax, Ix.SourceInfo build/decode/roundtrip FFI. + +use std::ffi::c_void; + +use crate::ix::env::{ + DataValue, Int, Name, SourceInfo, Substring, Syntax, SyntaxPreresolved, +}; +use crate::lean::array::LeanArrayObject; +use crate::lean::nat::Nat; +use crate::lean::string::LeanStringObject; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_array_set_core, + lean_ctor_get, lean_ctor_set, lean_ctor_set_uint8, lean_is_scalar, + lean_mk_string, lean_obj_tag, +}; + +use super::super::builder::LeanBuildCache; +use super::super::primitives::build_nat; +use super::name::{build_name, decode_ix_name}; + +/// Build a Ix.Int (ofNat or negSucc). +pub fn build_int(int: &Int) -> *mut c_void { + unsafe { + match int { + Int::OfNat(n) => { + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, build_nat(n)); + obj + }, + Int::NegSucc(n) => { + let obj = lean_alloc_ctor(1, 1, 0); + lean_ctor_set(obj, 0, build_nat(n)); + obj + }, + } + } +} + +/// Build a Ix.Substring. +pub fn build_substring(ss: &Substring) -> *mut c_void { + unsafe { + let s_cstr = crate::lean::safe_cstring(ss.str.as_str()); + let obj = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(obj, 0, lean_mk_string(s_cstr.as_ptr())); + lean_ctor_set(obj, 1, build_nat(&ss.start_pos)); + lean_ctor_set(obj, 2, build_nat(&ss.stop_pos)); + obj + } +} + +/// Build a Ix.SourceInfo. +pub fn build_source_info(si: &SourceInfo) -> *mut c_void { + unsafe { + match si { + // | original (leading : Substring) (pos : Nat) (trailing : Substring) (endPos : Nat) -- tag 0 + SourceInfo::Original(leading, pos, trailing, end_pos) => { + let obj = lean_alloc_ctor(0, 4, 0); + lean_ctor_set(obj, 0, build_substring(leading)); + lean_ctor_set(obj, 1, build_nat(pos)); + lean_ctor_set(obj, 2, build_substring(trailing)); + lean_ctor_set(obj, 3, build_nat(end_pos)); + obj + }, + // | synthetic (pos : Nat) (endPos : Nat) (canonical : Bool) -- tag 1 + SourceInfo::Synthetic(pos, end_pos, canonical) => { + let obj = lean_alloc_ctor(1, 2, 1); + lean_ctor_set(obj, 0, build_nat(pos)); + lean_ctor_set(obj, 1, build_nat(end_pos)); + lean_ctor_set_uint8(obj, 2 * 8, *canonical as u8); + obj + }, + // | none -- tag 2 + SourceInfo::None => lean_alloc_ctor(2, 0, 0), + } + } +} + +/// Build a Ix.SyntaxPreresolved. +pub fn build_syntax_preresolved( + cache: &mut LeanBuildCache, + sp: &SyntaxPreresolved, +) -> *mut c_void { + unsafe { + match sp { + // | namespace (name : Name) -- tag 0 + SyntaxPreresolved::Namespace(name) => { + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, build_name(cache, name)); + obj + }, + // | decl (name : Name) (aliases : Array String) -- tag 1 + SyntaxPreresolved::Decl(name, aliases) => { + let name_obj = build_name(cache, name); + let aliases_obj = build_string_array(aliases); + let obj = lean_alloc_ctor(1, 2, 0); + lean_ctor_set(obj, 0, name_obj); + lean_ctor_set(obj, 1, aliases_obj); + obj + }, + } + } +} + +/// Build an Array of Strings. +pub fn build_string_array(strings: &[String]) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(strings.len(), strings.len()); + for (i, s) in strings.iter().enumerate() { + let s_cstr = crate::lean::safe_cstring(s.as_str()); + lean_array_set_core(arr, i, lean_mk_string(s_cstr.as_ptr())); + } + arr + } +} + +/// Build a Ix.Syntax. +pub fn build_syntax(cache: &mut LeanBuildCache, syn: &Syntax) -> *mut c_void { + unsafe { + match syn { + // | missing -- tag 0 + Syntax::Missing => lean_alloc_ctor(0, 0, 0), + // | node (info : SourceInfo) (kind : Name) (args : Array Syntax) -- tag 1 + Syntax::Node(info, kind, args) => { + let info_obj = build_source_info(info); + let kind_obj = build_name(cache, kind); + let args_obj = build_syntax_array(cache, args); + let obj = lean_alloc_ctor(1, 3, 0); + lean_ctor_set(obj, 0, info_obj); + lean_ctor_set(obj, 1, kind_obj); + lean_ctor_set(obj, 2, args_obj); + obj + }, + // | atom (info : SourceInfo) (val : String) -- tag 2 + Syntax::Atom(info, val) => { + let info_obj = build_source_info(info); + let val_cstr = crate::lean::safe_cstring(val.as_str()); + let obj = lean_alloc_ctor(2, 2, 0); + lean_ctor_set(obj, 0, info_obj); + lean_ctor_set(obj, 1, lean_mk_string(val_cstr.as_ptr())); + obj + }, + // | ident (info : SourceInfo) (rawVal : Substring) (val : Name) (preresolved : Array SyntaxPreresolved) -- tag 3 + Syntax::Ident(info, raw_val, val, preresolved) => { + let info_obj = build_source_info(info); + let raw_val_obj = build_substring(raw_val); + let val_obj = build_name(cache, val); + let preresolved_obj = + build_syntax_preresolved_array(cache, preresolved); + let obj = lean_alloc_ctor(3, 4, 0); + lean_ctor_set(obj, 0, info_obj); + lean_ctor_set(obj, 1, raw_val_obj); + lean_ctor_set(obj, 2, val_obj); + lean_ctor_set(obj, 3, preresolved_obj); + obj + }, + } + } +} + +/// Build an Array of Syntax. +pub fn build_syntax_array( + cache: &mut LeanBuildCache, + items: &[Syntax], +) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(items.len(), items.len()); + for (i, item) in items.iter().enumerate() { + let item_obj = build_syntax(cache, item); + lean_array_set_core(arr, i, item_obj); + } + arr + } +} + +/// Build an Array of SyntaxPreresolved. +pub fn build_syntax_preresolved_array( + cache: &mut LeanBuildCache, + items: &[SyntaxPreresolved], +) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(items.len(), items.len()); + for (i, item) in items.iter().enumerate() { + let item_obj = build_syntax_preresolved(cache, item); + lean_array_set_core(arr, i, item_obj); + } + arr + } +} + +/// Build Ix.DataValue. +pub fn build_data_value( + cache: &mut LeanBuildCache, + dv: &DataValue, +) -> *mut c_void { + unsafe { + match dv { + DataValue::OfString(s) => { + let s_cstr = crate::lean::safe_cstring(s.as_str()); + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, lean_mk_string(s_cstr.as_ptr())); + obj + }, + DataValue::OfBool(b) => { + // 0 object fields, 1 scalar byte + let obj = lean_alloc_ctor(1, 0, 1); + lean_ctor_set_uint8(obj, 0, *b as u8); + obj + }, + DataValue::OfName(n) => { + let obj = lean_alloc_ctor(2, 1, 0); + lean_ctor_set(obj, 0, build_name(cache, n)); + obj + }, + DataValue::OfNat(n) => { + let obj = lean_alloc_ctor(3, 1, 0); + lean_ctor_set(obj, 0, build_nat(n)); + obj + }, + DataValue::OfInt(i) => { + let obj = lean_alloc_ctor(4, 1, 0); + lean_ctor_set(obj, 0, build_int(i)); + obj + }, + DataValue::OfSyntax(syn) => { + let obj = lean_alloc_ctor(5, 1, 0); + lean_ctor_set(obj, 0, build_syntax(cache, syn)); + obj + }, + } + } +} + +/// Build an Array of (Name × DataValue) for mdata. +pub fn build_kvmap( + cache: &mut LeanBuildCache, + data: &[(Name, DataValue)], +) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(data.len(), data.len()); + for (i, (name, dv)) in data.iter().enumerate() { + let name_obj = build_name(cache, name); + let dv_obj = build_data_value(cache, dv); + // Prod (Name × DataValue) + let pair = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(pair, 0, name_obj); + lean_ctor_set(pair, 1, dv_obj); + lean_array_set_core(arr, i, pair); + } + arr + } +} + +// ============================================================================= +// Decode Functions +// ============================================================================= + +/// Decode Ix.Int from Lean pointer. +/// Ix.Int: ofNat (tag 0, 1 field) | negSucc (tag 1, 1 field) +pub fn decode_ix_int(ptr: *const c_void) -> Int { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + let nat_ptr = lean_ctor_get(ptr as *mut _, 0); + let nat = Nat::from_ptr(nat_ptr); + match tag { + 0 => Int::OfNat(nat), + 1 => Int::NegSucc(nat), + _ => panic!("Invalid Ix.Int tag: {}", tag), + } + } +} + +/// Decode Ix.DataValue from a Lean pointer. +pub fn decode_data_value(ptr: *const c_void) -> DataValue { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + + match tag { + 0 => { + // ofString: 1 object field + let inner_ptr = lean_ctor_get(ptr as *mut _, 0); + let str_obj: &LeanStringObject = as_ref_unsafe(inner_ptr.cast()); + DataValue::OfString(str_obj.as_string()) + }, + 1 => { + // ofBool: 0 object fields, 1 scalar byte + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(ptr.cast()); + let b = ctor.get_scalar_u8(0, 0) != 0; + DataValue::OfBool(b) + }, + 2 => { + // ofName: 1 object field + let inner_ptr = lean_ctor_get(ptr as *mut _, 0); + DataValue::OfName(decode_ix_name(inner_ptr)) + }, + 3 => { + // ofNat: 1 object field + let inner_ptr = lean_ctor_get(ptr as *mut _, 0); + DataValue::OfNat(Nat::from_ptr(inner_ptr)) + }, + 4 => { + // ofInt: 1 object field + let inner_ptr = lean_ctor_get(ptr as *mut _, 0); + let int_tag = lean_obj_tag(inner_ptr as *mut _); + let nat_ptr = lean_ctor_get(inner_ptr as *mut _, 0); + let nat = Nat::from_ptr(nat_ptr); + match int_tag { + 0 => DataValue::OfInt(Int::OfNat(nat)), + 1 => DataValue::OfInt(Int::NegSucc(nat)), + _ => panic!("Invalid Int tag: {}", int_tag), + } + }, + 5 => { + // ofSyntax: 1 object field + let inner_ptr = lean_ctor_get(ptr as *mut _, 0); + DataValue::OfSyntax(decode_ix_syntax(inner_ptr).into()) + }, + _ => panic!("Invalid DataValue tag: {}", tag), + } + } +} + +/// Decode Ix.Syntax from a Lean pointer. +pub fn decode_ix_syntax(ptr: *const c_void) -> Syntax { + unsafe { + if lean_is_scalar(ptr) { + return Syntax::Missing; + } + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => Syntax::Missing, + 1 => { + // node: info, kind, args + let info_ptr = lean_ctor_get(ptr as *mut _, 0); + let kind_ptr = lean_ctor_get(ptr as *mut _, 1); + let args_ptr = lean_ctor_get(ptr as *mut _, 2); + + let info = decode_ix_source_info(info_ptr); + let kind = decode_ix_name(kind_ptr); + let args_obj: &LeanArrayObject = as_ref_unsafe(args_ptr.cast()); + let args: Vec = + args_obj.data().iter().map(|&p| decode_ix_syntax(p)).collect(); + + Syntax::Node(info, kind, args) + }, + 2 => { + // atom: info, val + let info_ptr = lean_ctor_get(ptr as *mut _, 0); + let val_ptr = lean_ctor_get(ptr as *mut _, 1); + + let info = decode_ix_source_info(info_ptr); + let val_obj: &LeanStringObject = as_ref_unsafe(val_ptr.cast()); + + Syntax::Atom(info, val_obj.as_string()) + }, + 3 => { + // ident: info, rawVal, val, preresolved + let info_ptr = lean_ctor_get(ptr as *mut _, 0); + let raw_val_ptr = lean_ctor_get(ptr as *mut _, 1); + let val_ptr = lean_ctor_get(ptr as *mut _, 2); + let preresolved_ptr = lean_ctor_get(ptr as *mut _, 3); + + let info = decode_ix_source_info(info_ptr); + let raw_val = decode_substring(raw_val_ptr); + let val = decode_ix_name(val_ptr); + let preresolved_obj: &LeanArrayObject = + as_ref_unsafe(preresolved_ptr.cast()); + let preresolved: Vec = preresolved_obj + .data() + .iter() + .map(|&p| decode_syntax_preresolved(p)) + .collect(); + + Syntax::Ident(info, raw_val, val, preresolved) + }, + _ => panic!("Invalid Syntax tag: {}", tag), + } + } +} + +/// Decode Ix.SourceInfo. +pub fn decode_ix_source_info(ptr: *const c_void) -> SourceInfo { + unsafe { + if lean_is_scalar(ptr) { + return SourceInfo::None; + } + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => { + // original + let leading_ptr = lean_ctor_get(ptr as *mut _, 0); + let pos_ptr = lean_ctor_get(ptr as *mut _, 1); + let trailing_ptr = lean_ctor_get(ptr as *mut _, 2); + let end_pos_ptr = lean_ctor_get(ptr as *mut _, 3); + + SourceInfo::Original( + decode_substring(leading_ptr), + Nat::from_ptr(pos_ptr), + decode_substring(trailing_ptr), + Nat::from_ptr(end_pos_ptr), + ) + }, + 1 => { + // synthetic: 2 obj fields (pos, end_pos), 1 scalar byte (canonical) + let pos_ptr = lean_ctor_get(ptr as *mut _, 0); + let end_pos_ptr = lean_ctor_get(ptr as *mut _, 1); + + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(ptr.cast()); + let canonical = ctor.get_scalar_u8(2, 0) != 0; + + SourceInfo::Synthetic( + Nat::from_ptr(pos_ptr), + Nat::from_ptr(end_pos_ptr), + canonical, + ) + }, + 2 => SourceInfo::None, + _ => panic!("Invalid SourceInfo tag: {}", tag), + } + } +} + +/// Decode Ix.Substring. +pub fn decode_substring(ptr: *const c_void) -> Substring { + unsafe { + let str_ptr = lean_ctor_get(ptr as *mut _, 0); + let start_ptr = lean_ctor_get(ptr as *mut _, 1); + let stop_ptr = lean_ctor_get(ptr as *mut _, 2); + + let str_obj: &LeanStringObject = as_ref_unsafe(str_ptr.cast()); + Substring { + str: str_obj.as_string(), + start_pos: Nat::from_ptr(start_ptr), + stop_pos: Nat::from_ptr(stop_ptr), + } + } +} + +/// Decode Ix.SyntaxPreresolved. +pub fn decode_syntax_preresolved(ptr: *const c_void) -> SyntaxPreresolved { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => { + // namespace + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + SyntaxPreresolved::Namespace(decode_ix_name(name_ptr)) + }, + 1 => { + // decl + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let aliases_ptr = lean_ctor_get(ptr as *mut _, 1); + + let name = decode_ix_name(name_ptr); + let aliases_obj: &LeanArrayObject = as_ref_unsafe(aliases_ptr.cast()); + let aliases: Vec = aliases_obj + .data() + .iter() + .map(|&p| { + let s: &LeanStringObject = as_ref_unsafe(p.cast()); + s.as_string() + }) + .collect(); + + SyntaxPreresolved::Decl(name, aliases) + }, + _ => panic!("Invalid SyntaxPreresolved tag: {}", tag), + } + } +} + +// ============================================================================= +// FFI Exports +// ============================================================================= + +/// Round-trip an Ix.Int: decode from Lean, re-encode. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_int(int_ptr: *const c_void) -> *mut c_void { + let int_val = decode_ix_int(int_ptr); + build_int(&int_val) +} + +/// Round-trip an Ix.Substring: decode from Lean, re-encode. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_substring( + sub_ptr: *const c_void, +) -> *mut c_void { + let sub = decode_substring(sub_ptr); + build_substring(&sub) +} + +/// Round-trip an Ix.SourceInfo: decode from Lean, re-encode. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_source_info( + si_ptr: *const c_void, +) -> *mut c_void { + let si = decode_ix_source_info(si_ptr); + build_source_info(&si) +} + +/// Round-trip an Ix.SyntaxPreresolved: decode from Lean, re-encode. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_syntax_preresolved( + sp_ptr: *const c_void, +) -> *mut c_void { + let sp = decode_syntax_preresolved(sp_ptr); + let mut cache = LeanBuildCache::new(); + build_syntax_preresolved(&mut cache, &sp) +} + +/// Round-trip an Ix.Syntax: decode from Lean, re-encode. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_syntax( + syn_ptr: *const c_void, +) -> *mut c_void { + let syn = decode_ix_syntax(syn_ptr); + let mut cache = LeanBuildCache::new(); + build_syntax(&mut cache, &syn) +} + +/// Round-trip an Ix.DataValue: decode from Lean, re-encode. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_data_value( + dv_ptr: *const c_void, +) -> *mut c_void { + let dv = decode_data_value(dv_ptr); + let mut cache = LeanBuildCache::new(); + build_data_value(&mut cache, &dv) +} diff --git a/src/lean/ffi/ix/env.rs b/src/lean/ffi/ix/env.rs new file mode 100644 index 00000000..38776728 --- /dev/null +++ b/src/lean/ffi/ix/env.rs @@ -0,0 +1,294 @@ +//! Ix.Environment build/decode/roundtrip FFI. + +use std::ffi::c_void; + +use rustc_hash::FxHashMap; + +use crate::ix::env::{ConstantInfo, Name}; +use crate::lean::array::LeanArrayObject; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_array_set_core, + lean_box_fn, lean_ctor_get, lean_ctor_set, lean_is_scalar, lean_obj_tag, +}; + +use super::super::builder::LeanBuildCache; +use super::constant::{build_constant_info, decode_constant_info}; +use super::name::{build_name, decode_ix_name}; + +// ============================================================================= +// HashMap Building +// ============================================================================= + +/// Build a Lean HashMap from pre-built key-value pairs. +/// +/// Lean's Std.HashMap structure (with unboxing): +/// - HashMap α β unboxes through DHashMap to Raw +/// - Raw = { size : Nat, buckets : Array (AssocList α β) } +/// - Field 0 = size (Nat), Field 1 = buckets (Array) +/// +/// AssocList α β = nil | cons (key : α) (value : β) (tail : AssocList α β) +pub fn build_hashmap_from_pairs( + pairs: Vec<(*mut c_void, *mut c_void, u64)>, // (key_obj, val_obj, hash) +) -> *mut c_void { + let size = pairs.len(); + let bucket_count = (size * 4 / 3 + 1).next_power_of_two().max(8); + + unsafe { + // Create array of AssocLists (initially all nil = boxed 0) + let buckets = lean_alloc_array(bucket_count, bucket_count); + for i in 0..bucket_count { + lean_array_set_core(buckets, i, lean_box_fn(0)); // nil + } + + // Insert entries + for (key_obj, val_obj, hash) in pairs { + let bucket_idx = + usize::try_from(hash).expect("hash overflows usize") % bucket_count; + + // Get current bucket (AssocList) + let buckets_arr = buckets.cast::(); + let current_tail = (*buckets_arr).data()[bucket_idx]; + + // cons (key : α) (value : β) (tail : AssocList α β) -- tag 1 + let cons = lean_alloc_ctor(1, 3, 0); + lean_ctor_set(cons, 0, key_obj); + lean_ctor_set(cons, 1, val_obj); + lean_ctor_set(cons, 2, current_tail as *mut c_void); + + lean_array_set_core(buckets, bucket_idx, cons); + } + + // Build Raw { size : Nat, buckets : Array } + // Due to unboxing, this IS the HashMap directly + // Field 0 = size, Field 1 = buckets (2 object fields, no scalars) + let size_obj = if size <= (usize::MAX >> 1) { + lean_box_fn(size) + } else { + crate::lean::lean_uint64_to_nat(size as u64) + }; + + let raw = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(raw, 0, size_obj); + lean_ctor_set(raw, 1, buckets); + raw + } +} + +// ============================================================================= +// Environment Building +// ============================================================================= + +/// Build a Ix.RawEnvironment from collected caches. +/// RawEnvironment has arrays that Lean will convert to HashMaps. +/// +/// Ix.RawEnvironment = { +/// consts : Array (Name × ConstantInfo) +/// } +/// +/// NOTE: RawEnvironment with a single field is UNBOXED by Lean, +/// so we return just the array, not a structure containing it. +pub fn build_raw_environment( + cache: &mut LeanBuildCache, + consts: &FxHashMap, +) -> *mut c_void { + unsafe { + // Build consts array: Array (Name × ConstantInfo) + // RawEnvironment is a single-field structure that may be unboxed to just the array + let consts_arr = lean_alloc_array(consts.len(), consts.len()); + for (i, (name, info)) in consts.iter().enumerate() { + let key_obj = build_name(cache, name); + let val_obj = build_constant_info(cache, info); + // Build pair (Name × ConstantInfo) + let pair = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(pair, 0, key_obj); + lean_ctor_set(pair, 1, val_obj); + lean_array_set_core(consts_arr, i, pair); + } + + consts_arr + } +} + +// ============================================================================= +// Environment Decoder +// ============================================================================= + +/// Decode a HashMap's AssocList and collect key-value pairs using a custom decoder. +fn decode_assoc_list( + list_ptr: *const c_void, + decode_key: FK, + decode_val: FV, +) -> Vec<(K, V)> +where + FK: Fn(*const c_void) -> K, + FV: Fn(*const c_void) -> V, +{ + let mut result = Vec::new(); + let mut current = list_ptr; + + loop { + unsafe { + if lean_is_scalar(current) { + break; + } + + let tag = lean_obj_tag(current as *mut _); + if tag == 0 { + // AssocList.nil + break; + } + + // AssocList.cons: 3 fields (key, value, tail) + let key_ptr = lean_ctor_get(current as *mut _, 0); + let value_ptr = lean_ctor_get(current as *mut _, 1); + let tail_ptr = lean_ctor_get(current as *mut _, 2); + + result.push((decode_key(key_ptr), decode_val(value_ptr))); + current = tail_ptr; + } + } + + result +} + +/// Decode a Lean HashMap into a Vec of key-value pairs. +/// HashMap structure (after unboxing): Raw { size : Nat, buckets : Array (AssocList α β) } +/// +/// Due to single-field struct unboxing: +/// - HashMap { inner : DHashMap } unboxes to DHashMap +/// - DHashMap { inner : Raw, wf : Prop } unboxes to Raw (Prop is erased) +/// - Raw { size : Nat, buckets : Array } - field 0 = size, field 1 = buckets +fn decode_hashmap( + map_ptr: *const c_void, + decode_key: FK, + decode_val: FV, +) -> Vec<(K, V)> +where + FK: Fn(*const c_void) -> K + Copy, + FV: Fn(*const c_void) -> V + Copy, +{ + unsafe { + // Raw layout: field 0 = size (Nat), field 1 = buckets (Array) + let _size_ptr = lean_ctor_get(map_ptr as *mut _, 0); // unused but needed for layout + let buckets_ptr = lean_ctor_get(map_ptr as *mut _, 1); + + let buckets_obj: &LeanArrayObject = as_ref_unsafe(buckets_ptr.cast()); + + let mut pairs = Vec::new(); + for &bucket_ptr in buckets_obj.data() { + let bucket_pairs = decode_assoc_list(bucket_ptr, decode_key, decode_val); + pairs.extend(bucket_pairs); + } + + pairs + } +} + +/// Decode Ix.Environment from Lean pointer. +/// +/// Ix.Environment = { +/// consts : HashMap Name ConstantInfo +/// } +/// +/// NOTE: Environment with a single field is UNBOXED by Lean, +/// so the pointer IS the HashMap directly, not a structure containing it. +pub fn decode_ix_environment( + ptr: *const c_void, +) -> FxHashMap { + // Environment is unboxed - ptr IS the HashMap directly + let consts_pairs = decode_hashmap(ptr, decode_ix_name, decode_constant_info); + let mut consts: FxHashMap = FxHashMap::default(); + for (name, info) in consts_pairs { + consts.insert(name, info); + } + consts +} + +/// Decode Ix.RawEnvironment from Lean pointer into HashMap. +/// RawEnvironment = { consts : Array (Name × ConstantInfo) } +/// NOTE: Unboxed to just Array. This version deduplicates by name. +pub fn decode_ix_raw_environment( + ptr: *const c_void, +) -> FxHashMap { + unsafe { + // RawEnvironment is a single-field structure that may be unboxed + // Try treating ptr as the array directly first + let arr_obj: &LeanArrayObject = as_ref_unsafe(ptr.cast()); + let mut consts: FxHashMap = FxHashMap::default(); + + for &pair_ptr in arr_obj.data() { + let name_ptr = lean_ctor_get(pair_ptr as *mut _, 0); + let info_ptr = lean_ctor_get(pair_ptr as *mut _, 1); + let name = decode_ix_name(name_ptr); + let info = decode_constant_info(info_ptr); + consts.insert(name, info); + } + + consts + } +} + +/// Decode Ix.RawEnvironment from Lean pointer preserving array structure. +/// This version preserves all entries including duplicates. +pub fn decode_ix_raw_environment_vec( + ptr: *const c_void, +) -> Vec<(Name, ConstantInfo)> { + unsafe { + let arr_obj: &LeanArrayObject = as_ref_unsafe(ptr.cast()); + let mut consts = Vec::with_capacity(arr_obj.data().len()); + + for &pair_ptr in arr_obj.data() { + let name_ptr = lean_ctor_get(pair_ptr as *mut _, 0); + let info_ptr = lean_ctor_get(pair_ptr as *mut _, 1); + let name = decode_ix_name(name_ptr); + let info = decode_constant_info(info_ptr); + consts.push((name, info)); + } + + consts + } +} + +/// Build Ix.RawEnvironment from Vec, preserving order and duplicates. +pub fn build_raw_environment_from_vec( + cache: &mut LeanBuildCache, + consts: &[(Name, ConstantInfo)], +) -> *mut c_void { + unsafe { + let consts_arr = lean_alloc_array(consts.len(), consts.len()); + for (i, (name, info)) in consts.iter().enumerate() { + let key_obj = build_name(cache, name); + let val_obj = build_constant_info(cache, info); + let pair = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(pair, 0, key_obj); + lean_ctor_set(pair, 1, val_obj); + lean_array_set_core(consts_arr, i, pair); + } + consts_arr + } +} + +// ============================================================================= +// FFI Exports +// ============================================================================= + +/// Round-trip an Ix.Environment: decode from Lean, re-encode. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_environment( + env_ptr: *const c_void, +) -> *mut c_void { + let env = decode_ix_environment(env_ptr); + let mut cache = LeanBuildCache::with_capacity(env.len()); + build_raw_environment(&mut cache, &env) +} + +/// Round-trip an Ix.RawEnvironment: decode from Lean, re-encode. +/// Uses Vec-preserving functions to maintain array structure and order. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_raw_environment( + env_ptr: *const c_void, +) -> *mut c_void { + let env = decode_ix_raw_environment_vec(env_ptr); + let mut cache = LeanBuildCache::with_capacity(env.len()); + build_raw_environment_from_vec(&mut cache, &env) +} diff --git a/src/lean/ffi/ix/expr.rs b/src/lean/ffi/ix/expr.rs new file mode 100644 index 00000000..598d5a77 --- /dev/null +++ b/src/lean/ffi/ix/expr.rs @@ -0,0 +1,430 @@ +//! Ix.Expr build/decode/roundtrip FFI. +//! +//! Ix.Expr layout (12 constructors): +//! - Tag 0: bvar (idx : Nat) (hash : Address) +//! - Tag 1: fvar (name : Name) (hash : Address) +//! - Tag 2: mvar (name : Name) (hash : Address) +//! - Tag 3: sort (level : Level) (hash : Address) +//! - Tag 4: const (name : Name) (levels : Array Level) (hash : Address) +//! - Tag 5: app (fn arg : Expr) (hash : Address) +//! - Tag 6: lam (name : Name) (ty body : Expr) (bi : BinderInfo) (hash : Address) +//! - Tag 7: forallE (name : Name) (ty body : Expr) (bi : BinderInfo) (hash : Address) +//! - Tag 8: letE (name : Name) (ty val body : Expr) (nonDep : Bool) (hash : Address) +//! - Tag 9: lit (l : Literal) (hash : Address) +//! - Tag 10: mdata (data : Array (Name × DataValue)) (expr : Expr) (hash : Address) +//! - Tag 11: proj (typeName : Name) (idx : Nat) (struct : Expr) (hash : Address) + +use std::ffi::c_void; + +use crate::ix::env::{ + BinderInfo, DataValue, Expr, ExprData, Level, Literal, Name, +}; +use crate::lean::array::LeanArrayObject; +use crate::lean::nat::Nat; +use crate::lean::string::LeanStringObject; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_array_set_core, + lean_box_fn, lean_ctor_get, lean_ctor_set, lean_ctor_set_uint8, lean_inc, + lean_mk_string, lean_obj_tag, +}; + +use super::super::builder::LeanBuildCache; +use super::super::primitives::build_nat; +use super::address::build_address; +use super::data::{build_data_value, decode_data_value}; +use super::level::{build_level, build_level_array, decode_ix_level}; +use super::name::{build_name, decode_ix_name}; + +/// Build a Lean Ix.Expr with embedded hash. +/// Uses caching to avoid rebuilding the same expression. +pub fn build_expr(cache: &mut LeanBuildCache, expr: &Expr) -> *mut c_void { + let hash = *expr.get_hash(); + if let Some(&cached) = cache.exprs.get(&hash) { + unsafe { lean_inc(cached) }; + return cached; + } + + let result = unsafe { + match expr.as_data() { + ExprData::Bvar(idx, h) => { + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, build_nat(idx)); + lean_ctor_set(obj, 1, build_address(h)); + obj + }, + ExprData::Fvar(name, h) => { + let obj = lean_alloc_ctor(1, 2, 0); + lean_ctor_set(obj, 0, build_name(cache, name)); + lean_ctor_set(obj, 1, build_address(h)); + obj + }, + ExprData::Mvar(name, h) => { + let obj = lean_alloc_ctor(2, 2, 0); + lean_ctor_set(obj, 0, build_name(cache, name)); + lean_ctor_set(obj, 1, build_address(h)); + obj + }, + ExprData::Sort(level, h) => { + let obj = lean_alloc_ctor(3, 2, 0); + lean_ctor_set(obj, 0, build_level(cache, level)); + lean_ctor_set(obj, 1, build_address(h)); + obj + }, + ExprData::Const(name, levels, h) => { + let name_obj = build_name(cache, name); + let levels_obj = build_level_array(cache, levels); + let obj = lean_alloc_ctor(4, 3, 0); + lean_ctor_set(obj, 0, name_obj); + lean_ctor_set(obj, 1, levels_obj); + lean_ctor_set(obj, 2, build_address(h)); + obj + }, + ExprData::App(fn_expr, arg_expr, h) => { + let fn_obj = build_expr(cache, fn_expr); + let arg_obj = build_expr(cache, arg_expr); + let obj = lean_alloc_ctor(5, 3, 0); + lean_ctor_set(obj, 0, fn_obj); + lean_ctor_set(obj, 1, arg_obj); + lean_ctor_set(obj, 2, build_address(h)); + obj + }, + ExprData::Lam(name, ty, body, bi, h) => { + let name_obj = build_name(cache, name); + let ty_obj = build_expr(cache, ty); + let body_obj = build_expr(cache, body); + let hash_obj = build_address(h); + // 4 object fields, 1 scalar byte for BinderInfo + let obj = lean_alloc_ctor(6, 4, 1); + lean_ctor_set(obj, 0, name_obj); + lean_ctor_set(obj, 1, ty_obj); + lean_ctor_set(obj, 2, body_obj); + lean_ctor_set(obj, 3, hash_obj); + lean_ctor_set_uint8(obj, 4 * 8, binder_info_to_u8(bi)); + obj + }, + ExprData::ForallE(name, ty, body, bi, h) => { + let name_obj = build_name(cache, name); + let ty_obj = build_expr(cache, ty); + let body_obj = build_expr(cache, body); + let hash_obj = build_address(h); + let obj = lean_alloc_ctor(7, 4, 1); + lean_ctor_set(obj, 0, name_obj); + lean_ctor_set(obj, 1, ty_obj); + lean_ctor_set(obj, 2, body_obj); + lean_ctor_set(obj, 3, hash_obj); + lean_ctor_set_uint8(obj, 4 * 8, binder_info_to_u8(bi)); + obj + }, + ExprData::LetE(name, ty, val, body, non_dep, h) => { + let name_obj = build_name(cache, name); + let ty_obj = build_expr(cache, ty); + let val_obj = build_expr(cache, val); + let body_obj = build_expr(cache, body); + let hash_obj = build_address(h); + // 5 object fields, 1 scalar byte for Bool + let obj = lean_alloc_ctor(8, 5, 1); + lean_ctor_set(obj, 0, name_obj); + lean_ctor_set(obj, 1, ty_obj); + lean_ctor_set(obj, 2, val_obj); + lean_ctor_set(obj, 3, body_obj); + lean_ctor_set(obj, 4, hash_obj); + lean_ctor_set_uint8(obj, 5 * 8, *non_dep as u8); + obj + }, + ExprData::Lit(lit, h) => { + let lit_obj = build_literal(lit); + let obj = lean_alloc_ctor(9, 2, 0); + lean_ctor_set(obj, 0, lit_obj); + lean_ctor_set(obj, 1, build_address(h)); + obj + }, + ExprData::Mdata(md, inner, h) => { + let md_obj = build_mdata_array(cache, md); + let inner_obj = build_expr(cache, inner); + let obj = lean_alloc_ctor(10, 3, 0); + lean_ctor_set(obj, 0, md_obj); + lean_ctor_set(obj, 1, inner_obj); + lean_ctor_set(obj, 2, build_address(h)); + obj + }, + ExprData::Proj(type_name, idx, struct_expr, h) => { + let name_obj = build_name(cache, type_name); + let idx_obj = build_nat(idx); + let struct_obj = build_expr(cache, struct_expr); + let obj = lean_alloc_ctor(11, 4, 0); + lean_ctor_set(obj, 0, name_obj); + lean_ctor_set(obj, 1, idx_obj); + lean_ctor_set(obj, 2, struct_obj); + lean_ctor_set(obj, 3, build_address(h)); + obj + }, + } + }; + + cache.exprs.insert(hash, result); + result +} + +/// Build an Array of (Name × DataValue) for mdata. +fn build_mdata_array( + cache: &mut LeanBuildCache, + md: &[(Name, DataValue)], +) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(md.len(), md.len()); + for (i, (name, dv)) in md.iter().enumerate() { + let pair = build_name_datavalue_pair(cache, name, dv); + lean_array_set_core(arr, i, pair); + } + arr + } +} + +/// Build a (Name, DataValue) pair (Prod). +fn build_name_datavalue_pair( + cache: &mut LeanBuildCache, + name: &Name, + dv: &DataValue, +) -> *mut c_void { + unsafe { + let name_obj = build_name(cache, name); + let dv_obj = build_data_value(cache, dv); + let pair = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(pair, 0, name_obj); + lean_ctor_set(pair, 1, dv_obj); + pair + } +} + +/// Build a Literal (natVal or strVal). +pub fn build_literal(lit: &Literal) -> *mut c_void { + unsafe { + match lit { + Literal::NatVal(n) => { + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, build_nat(n)); + obj + }, + Literal::StrVal(s) => { + let s_cstr = crate::lean::safe_cstring(s.as_str()); + let obj = lean_alloc_ctor(1, 1, 0); + lean_ctor_set(obj, 0, lean_mk_string(s_cstr.as_ptr())); + obj + }, + } + } +} + +/// Build Ix.BinderInfo enum. +/// BinderInfo is a 4-constructor enum with no fields, stored as boxed scalar. +pub fn build_binder_info(bi: &BinderInfo) -> *mut c_void { + lean_box_fn(binder_info_to_u8(bi) as usize) +} + +/// Convert BinderInfo to u8 tag. +pub fn binder_info_to_u8(bi: &BinderInfo) -> u8 { + match bi { + BinderInfo::Default => 0, + BinderInfo::Implicit => 1, + BinderInfo::StrictImplicit => 2, + BinderInfo::InstImplicit => 3, + } +} + +/// Decode a Lean Ix.Expr to Rust Expr. +pub fn decode_ix_expr(ptr: *const c_void) -> Expr { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => { + // bvar + let idx_ptr = lean_ctor_get(ptr as *mut _, 0); + let idx = Nat::from_ptr(idx_ptr); + Expr::bvar(idx) + }, + 1 => { + // fvar + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let name = decode_ix_name(name_ptr); + Expr::fvar(name) + }, + 2 => { + // mvar + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let name = decode_ix_name(name_ptr); + Expr::mvar(name) + }, + 3 => { + // sort + let level_ptr = lean_ctor_get(ptr as *mut _, 0); + let level = decode_ix_level(level_ptr); + Expr::sort(level) + }, + 4 => { + // const + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let levels_ptr = lean_ctor_get(ptr as *mut _, 1); + + let name = decode_ix_name(name_ptr); + let levels_obj: &LeanArrayObject = as_ref_unsafe(levels_ptr.cast()); + let levels: Vec = + levels_obj.data().iter().map(|&p| decode_ix_level(p)).collect(); + + Expr::cnst(name, levels) + }, + 5 => { + // app + let fn_ptr = lean_ctor_get(ptr as *mut _, 0); + let arg_ptr = lean_ctor_get(ptr as *mut _, 1); + let fn_expr = decode_ix_expr(fn_ptr); + let arg_expr = decode_ix_expr(arg_ptr); + Expr::app(fn_expr, arg_expr) + }, + 6 => { + // lam: name, ty, body, hash, bi (scalar) + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let ty_ptr = lean_ctor_get(ptr as *mut _, 1); + let body_ptr = lean_ctor_get(ptr as *mut _, 2); + // hash at field 3 + // bi is a scalar byte at offset 4*8 + + let name = decode_ix_name(name_ptr); + let ty = decode_ix_expr(ty_ptr); + let body = decode_ix_expr(body_ptr); + + // Read BinderInfo scalar (4 obj fields: name, ty, body, hash) + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(ptr.cast()); + let bi_byte = ctor.get_scalar_u8(4, 0); + let bi = decode_binder_info(bi_byte); + + Expr::lam(name, ty, body, bi) + }, + 7 => { + // forallE: same layout as lam + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let ty_ptr = lean_ctor_get(ptr as *mut _, 1); + let body_ptr = lean_ctor_get(ptr as *mut _, 2); + + let name = decode_ix_name(name_ptr); + let ty = decode_ix_expr(ty_ptr); + let body = decode_ix_expr(body_ptr); + + // 4 obj fields: name, ty, body, hash + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(ptr.cast()); + let bi_byte = ctor.get_scalar_u8(4, 0); + let bi = decode_binder_info(bi_byte); + + Expr::all(name, ty, body, bi) + }, + 8 => { + // letE: name, ty, val, body, hash, nonDep (scalar) + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let ty_ptr = lean_ctor_get(ptr as *mut _, 1); + let val_ptr = lean_ctor_get(ptr as *mut _, 2); + let body_ptr = lean_ctor_get(ptr as *mut _, 3); + // hash at field 4 + // nonDep is scalar byte after 5 obj fields + + let name = decode_ix_name(name_ptr); + let ty = decode_ix_expr(ty_ptr); + let val = decode_ix_expr(val_ptr); + let body = decode_ix_expr(body_ptr); + + // 5 obj fields: name, ty, val, body, hash + let ctor: &crate::lean::ctor::LeanCtorObject = + as_ref_unsafe(ptr.cast()); + let non_dep = ctor.get_scalar_u8(5, 0) != 0; + + Expr::letE(name, ty, val, body, non_dep) + }, + 9 => { + // lit + let lit_ptr = lean_ctor_get(ptr as *mut _, 0); + let lit = decode_literal(lit_ptr); + Expr::lit(lit) + }, + 10 => { + // mdata: data, expr, hash + let data_ptr = lean_ctor_get(ptr as *mut _, 0); + let expr_ptr = lean_ctor_get(ptr as *mut _, 1); + + let data_obj: &LeanArrayObject = as_ref_unsafe(data_ptr.cast()); + let data: Vec<(Name, DataValue)> = + data_obj.data().iter().map(|&p| decode_name_data_value(p)).collect(); + + let inner = decode_ix_expr(expr_ptr); + Expr::mdata(data, inner) + }, + 11 => { + // proj: typeName, idx, struct, hash + let type_name_ptr = lean_ctor_get(ptr as *mut _, 0); + let idx_ptr = lean_ctor_get(ptr as *mut _, 1); + let struct_ptr = lean_ctor_get(ptr as *mut _, 2); + + let type_name = decode_ix_name(type_name_ptr); + let idx = Nat::from_ptr(idx_ptr); + let struct_expr = decode_ix_expr(struct_ptr); + + Expr::proj(type_name, idx, struct_expr) + }, + _ => panic!("Invalid Ix.Expr tag: {}", tag), + } + } +} + +/// Decode Lean.Literal from a Lean pointer. +pub fn decode_literal(ptr: *const c_void) -> Literal { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => { + // natVal + let nat_ptr = lean_ctor_get(ptr as *mut _, 0); + let nat = Nat::from_ptr(nat_ptr); + Literal::NatVal(nat) + }, + 1 => { + // strVal + let str_ptr = lean_ctor_get(ptr as *mut _, 0); + let str_obj: &LeanStringObject = as_ref_unsafe(str_ptr.cast()); + Literal::StrVal(str_obj.as_string()) + }, + _ => panic!("Invalid Literal tag: {}", tag), + } + } +} + +/// Decode a (Name × DataValue) pair for mdata. +fn decode_name_data_value(ptr: *const c_void) -> (Name, DataValue) { + unsafe { + // Prod: ctor 0 with 2 fields + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let dv_ptr = lean_ctor_get(ptr as *mut _, 1); + + let name = decode_ix_name(name_ptr); + let dv = decode_data_value(dv_ptr); + + (name, dv) + } +} + +/// Decode BinderInfo from byte. +pub fn decode_binder_info(bi_byte: u8) -> BinderInfo { + match bi_byte { + 0 => BinderInfo::Default, + 1 => BinderInfo::Implicit, + 2 => BinderInfo::StrictImplicit, + 3 => BinderInfo::InstImplicit, + _ => panic!("Invalid BinderInfo: {}", bi_byte), + } +} + +/// Round-trip an Ix.Expr: decode from Lean, re-encode via LeanBuildCache. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_expr(expr_ptr: *const c_void) -> *mut c_void { + let expr = decode_ix_expr(expr_ptr); + let mut cache = LeanBuildCache::new(); + build_expr(&mut cache, &expr) +} diff --git a/src/lean/ffi/ix/level.rs b/src/lean/ffi/ix/level.rs new file mode 100644 index 00000000..cc139a78 --- /dev/null +++ b/src/lean/ffi/ix/level.rs @@ -0,0 +1,155 @@ +//! Ix.Level build/decode/roundtrip FFI. +//! +//! Ix.Level layout: +//! - Tag 0: zero (hash : Address) +//! - Tag 1: succ (x : Level) (hash : Address) +//! - Tag 2: max (x y : Level) (hash : Address) +//! - Tag 3: imax (x y : Level) (hash : Address) +//! - Tag 4: param (n : Name) (hash : Address) +//! - Tag 5: mvar (n : Name) (hash : Address) + +use std::ffi::c_void; + +use crate::ix::env::{Level, LevelData}; +use crate::lean::{ + lean_alloc_array, lean_alloc_ctor, lean_array_set_core, lean_ctor_get, + lean_ctor_set, lean_inc, lean_obj_tag, +}; + +use super::super::builder::LeanBuildCache; +use super::address::build_address; +use super::name::{build_name, decode_ix_name}; + +/// Build a Lean Ix.Level with embedded hash. +/// Uses caching to avoid rebuilding the same level. +pub fn build_level(cache: &mut LeanBuildCache, level: &Level) -> *mut c_void { + let hash = *level.get_hash(); + if let Some(&cached) = cache.levels.get(&hash) { + unsafe { lean_inc(cached) }; + return cached; + } + + let result = unsafe { + match level.as_data() { + LevelData::Zero(h) => { + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, build_address(h)); + obj + }, + LevelData::Succ(x, h) => { + let x_obj = build_level(cache, x); + let obj = lean_alloc_ctor(1, 2, 0); + lean_ctor_set(obj, 0, x_obj); + lean_ctor_set(obj, 1, build_address(h)); + obj + }, + LevelData::Max(x, y, h) => { + let x_obj = build_level(cache, x); + let y_obj = build_level(cache, y); + let obj = lean_alloc_ctor(2, 3, 0); + lean_ctor_set(obj, 0, x_obj); + lean_ctor_set(obj, 1, y_obj); + lean_ctor_set(obj, 2, build_address(h)); + obj + }, + LevelData::Imax(x, y, h) => { + let x_obj = build_level(cache, x); + let y_obj = build_level(cache, y); + let obj = lean_alloc_ctor(3, 3, 0); + lean_ctor_set(obj, 0, x_obj); + lean_ctor_set(obj, 1, y_obj); + lean_ctor_set(obj, 2, build_address(h)); + obj + }, + LevelData::Param(n, h) => { + let n_obj = build_name(cache, n); + let obj = lean_alloc_ctor(4, 2, 0); + lean_ctor_set(obj, 0, n_obj); + lean_ctor_set(obj, 1, build_address(h)); + obj + }, + LevelData::Mvar(n, h) => { + let n_obj = build_name(cache, n); + let obj = lean_alloc_ctor(5, 2, 0); + lean_ctor_set(obj, 0, n_obj); + lean_ctor_set(obj, 1, build_address(h)); + obj + }, + } + }; + + cache.levels.insert(hash, result); + result +} + +/// Build an Array of Levels. +pub fn build_level_array( + cache: &mut LeanBuildCache, + levels: &[Level], +) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(levels.len(), levels.len()); + for (i, level) in levels.iter().enumerate() { + let level_obj = build_level(cache, level); + lean_array_set_core(arr, i, level_obj); + } + arr + } +} + +/// Decode a Lean Ix.Level to Rust Level. +pub fn decode_ix_level(ptr: *const c_void) -> Level { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => Level::zero(), + 1 => { + let x_ptr = lean_ctor_get(ptr as *mut _, 0); + let x = decode_ix_level(x_ptr); + Level::succ(x) + }, + 2 => { + let x_ptr = lean_ctor_get(ptr as *mut _, 0); + let y_ptr = lean_ctor_get(ptr as *mut _, 1); + let x = decode_ix_level(x_ptr); + let y = decode_ix_level(y_ptr); + Level::max(x, y) + }, + 3 => { + let x_ptr = lean_ctor_get(ptr as *mut _, 0); + let y_ptr = lean_ctor_get(ptr as *mut _, 1); + let x = decode_ix_level(x_ptr); + let y = decode_ix_level(y_ptr); + Level::imax(x, y) + }, + 4 => { + let n_ptr = lean_ctor_get(ptr as *mut _, 0); + let n = decode_ix_name(n_ptr); + Level::param(n) + }, + 5 => { + let n_ptr = lean_ctor_get(ptr as *mut _, 0); + let n = decode_ix_name(n_ptr); + Level::mvar(n) + }, + _ => panic!("Invalid Ix.Level tag: {}", tag), + } + } +} + +/// Decode Array of Levels from Lean pointer. +pub fn decode_level_array(ptr: *const c_void) -> Vec { + let arr_obj: &crate::lean::array::LeanArrayObject = + crate::lean::as_ref_unsafe(ptr.cast()); + arr_obj.data().iter().map(|&p| decode_ix_level(p)).collect() +} + +/// Round-trip an Ix.Level: decode from Lean, re-encode via LeanBuildCache. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_level( + level_ptr: *const c_void, +) -> *mut c_void { + let level = decode_ix_level(level_ptr); + let mut cache = LeanBuildCache::new(); + build_level(&mut cache, &level) +} diff --git a/src/lean/ffi/ix/name.rs b/src/lean/ffi/ix/name.rs new file mode 100644 index 00000000..052606eb --- /dev/null +++ b/src/lean/ffi/ix/name.rs @@ -0,0 +1,130 @@ +//! Ix.Name build/decode/roundtrip FFI. +//! +//! Ix.Name layout: +//! - Tag 0: anonymous (hash : Address) +//! - Tag 1: str (parent : Name) (s : String) (hash : Address) +//! - Tag 2: num (parent : Name) (i : Nat) (hash : Address) + +use std::ffi::c_void; + +use crate::ix::env::{Name, NameData}; +use crate::lean::nat::Nat; +use crate::lean::string::LeanStringObject; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_array_set_core, + lean_ctor_get, lean_ctor_set, lean_inc, lean_mk_string, lean_obj_tag, +}; + +use super::super::builder::LeanBuildCache; +use super::super::primitives::build_nat; +use super::address::build_address; + +/// Build a Lean Ix.Name with embedded hash. +/// Uses caching to avoid rebuilding the same name. +pub fn build_name(cache: &mut LeanBuildCache, name: &Name) -> *mut c_void { + let hash = name.get_hash(); + if let Some(&cached) = cache.names.get(hash) { + unsafe { lean_inc(cached) }; + return cached; + } + + let result = unsafe { + match name.as_data() { + NameData::Anonymous(h) => { + // anonymous: (hash : Address) + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, build_address(h)); + obj + }, + NameData::Str(parent, s, h) => { + // str: (parent : Name) (s : String) (hash : Address) + let parent_obj = build_name(cache, parent); + let s_cstr = crate::lean::safe_cstring(s.as_str()); + let obj = lean_alloc_ctor(1, 3, 0); + lean_ctor_set(obj, 0, parent_obj); + lean_ctor_set(obj, 1, lean_mk_string(s_cstr.as_ptr())); + lean_ctor_set(obj, 2, build_address(h)); + obj + }, + NameData::Num(parent, n, h) => { + // num: (parent : Name) (i : Nat) (hash : Address) + let parent_obj = build_name(cache, parent); + let n_obj = build_nat(n); + let obj = lean_alloc_ctor(2, 3, 0); + lean_ctor_set(obj, 0, parent_obj); + lean_ctor_set(obj, 1, n_obj); + lean_ctor_set(obj, 2, build_address(h)); + obj + }, + } + }; + + cache.names.insert(*hash, result); + result +} + +/// Build an Array of Names. +pub fn build_name_array( + cache: &mut LeanBuildCache, + names: &[Name], +) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(names.len(), names.len()); + for (i, name) in names.iter().enumerate() { + let name_obj = build_name(cache, name); + lean_array_set_core(arr, i, name_obj); + } + arr + } +} + +/// Decode a Lean Ix.Name to Rust Name. +pub fn decode_ix_name(ptr: *const c_void) -> Name { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => { + // anonymous: just has hash, construct anon Name + Name::anon() + }, + 1 => { + // str: parent, s, hash + let parent_ptr = lean_ctor_get(ptr as *mut _, 0); + let s_ptr = lean_ctor_get(ptr as *mut _, 1); + // hash at field 2 is ignored - Rust recomputes it + + let parent = decode_ix_name(parent_ptr); + let s_obj: &LeanStringObject = as_ref_unsafe(s_ptr.cast()); + let s = s_obj.as_string(); + + Name::str(parent, s) + }, + 2 => { + // num: parent, i, hash + let parent_ptr = lean_ctor_get(ptr as *mut _, 0); + let i_ptr = lean_ctor_get(ptr as *mut _, 1); + // hash at field 2 is ignored + + let parent = decode_ix_name(parent_ptr); + let i = Nat::from_ptr(i_ptr); + + Name::num(parent, i) + }, + _ => panic!("Invalid Ix.Name tag: {}", tag), + } + } +} + +/// Decode Array of Names from Lean pointer. +pub fn decode_name_array(ptr: *const c_void) -> Vec { + let arr_obj: &crate::lean::array::LeanArrayObject = as_ref_unsafe(ptr.cast()); + arr_obj.data().iter().map(|&p| decode_ix_name(p)).collect() +} + +/// Round-trip an Ix.Name: decode from Lean, re-encode via LeanBuildCache. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ix_name(name_ptr: *const c_void) -> *mut c_void { + let name = decode_ix_name(name_ptr); + let mut cache = LeanBuildCache::new(); + build_name(&mut cache, &name) +} diff --git a/src/lean/ffi/ixon.rs b/src/lean/ffi/ixon.rs index 934b3781..a2244646 100644 --- a/src/lean/ffi/ixon.rs +++ b/src/lean/ffi/ixon.rs @@ -1,483 +1,24 @@ -use std::ffi::c_void; - -use crate::{ - ix::address::{Address, MetaAddress}, - ix::env::{BinderInfo, DefinitionSafety, QuotKind, ReducibilityHints}, - ix::ixon::{ - Axiom, BuiltIn, CheckClaim, Claim, Comm, Constructor, ConstructorProj, - DataValue, DefKind, Definition, DefinitionProj, Env, EvalClaim, Inductive, - InductiveProj, Ixon, Metadata, Metadatum, MutConst, Proof, Quotient, - Recursor, RecursorProj, RecursorRule, Serialize, - }, - lean::{ - as_ref_unsafe, collect_list, ctor::LeanCtorObject, lean_is_scalar, - nat::Nat, sarray::LeanSArrayObject, - }, - lean_unbox, -}; - -fn lean_ptr_to_address(ptr: *const c_void) -> Address { - let sarray: &LeanSArrayObject = as_ref_unsafe(ptr.cast()); - Address::from_slice(sarray.data()).unwrap() -} - -fn lean_ptr_to_definition(ptr: *const c_void) -> Definition { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [lvls, typ, value, mode_safety] = ctor.objs(); - let lvls = Nat::from_ptr(lvls); - let typ = lean_ptr_to_address(typ); - let value = lean_ptr_to_address(value); - let [kind, safety, ..] = (mode_safety as usize).to_le_bytes(); - let kind = match kind { - 0 => DefKind::Definition, - 1 => DefKind::Opaque, - 2 => DefKind::Theorem, - _ => unreachable!(), - }; - let safety = match safety { - 0 => DefinitionSafety::Unsafe, - 1 => DefinitionSafety::Safe, - 2 => DefinitionSafety::Partial, - _ => unreachable!(), - }; - Definition { lvls, typ, kind, value, safety } -} - -fn lean_ptr_to_constructor(ptr: *const c_void) -> Constructor { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [lvls, cidx, params, fields, typ, is_unsafe] = ctor.objs(); - let lvls = Nat::from_ptr(lvls); - let typ = lean_ptr_to_address(typ); - let cidx = Nat::from_ptr(cidx); - let params = Nat::from_ptr(params); - let fields = Nat::from_ptr(fields); - let is_unsafe = is_unsafe as usize == 1; - Constructor { lvls, typ, cidx, params, fields, is_unsafe } -} - -fn lean_ptr_to_recursor_rule(ptr: *const c_void) -> RecursorRule { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [fields, rhs] = ctor.objs(); - let fields = Nat::from_ptr(fields); - let rhs = lean_ptr_to_address(rhs); - RecursorRule { fields, rhs } -} - -fn lean_ptr_to_recursor(ptr: *const c_void) -> Recursor { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [lvls, params, indices, motives, minors, typ, rules, k_isunsafe] = - ctor.objs(); - let lvls = Nat::from_ptr(lvls); - let typ = lean_ptr_to_address(typ); - let params = Nat::from_ptr(params); - let indices = Nat::from_ptr(indices); - let motives = Nat::from_ptr(motives); - let minors = Nat::from_ptr(minors); - let rules = collect_list(rules, lean_ptr_to_recursor_rule); - let [k, is_unsafe, ..] = (k_isunsafe as usize).to_le_bytes(); - let k = k == 1; - let is_unsafe = is_unsafe == 1; - Recursor { lvls, typ, params, indices, motives, minors, rules, k, is_unsafe } -} - -fn lean_ptr_to_axiom(ptr: *const c_void) -> Axiom { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [lvls, typ, is_unsafe] = ctor.objs(); - let lvls = Nat::from_ptr(lvls); - let typ = lean_ptr_to_address(typ); - let is_unsafe = is_unsafe as usize == 1; - Axiom { is_unsafe, lvls, typ } -} - -fn lean_ptr_to_quotient(ptr: *const c_void) -> Quotient { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [lvls, typ, kind] = ctor.objs(); - let lvls = Nat::from_ptr(lvls); - let typ = lean_ptr_to_address(typ); - let kind = match kind as usize { - 0 => QuotKind::Type, - 1 => QuotKind::Ctor, - 2 => QuotKind::Lift, - 3 => QuotKind::Ind, - _ => unreachable!(), - }; - Quotient { kind, lvls, typ } -} - -fn lean_ptr_to_constructor_proj(ptr: *const c_void) -> ConstructorProj { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [idx, cidx, block] = ctor.objs(); - let [idx, cidx] = [idx, cidx].map(Nat::from_ptr); - let block = lean_ptr_to_address(block); - ConstructorProj { idx, cidx, block } -} - -fn lean_ptr_to_recursor_proj(ptr: *const c_void) -> RecursorProj { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [idx, block] = ctor.objs(); - let idx = Nat::from_ptr(idx); - let block = lean_ptr_to_address(block); - RecursorProj { idx, block } -} - -fn lean_ptr_to_inductive_proj(ptr: *const c_void) -> InductiveProj { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [idx, block] = ctor.objs(); - let idx = Nat::from_ptr(idx); - let block = lean_ptr_to_address(block); - InductiveProj { idx, block } -} - -fn lean_ptr_to_definition_proj(ptr: *const c_void) -> DefinitionProj { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [idx, block] = ctor.objs(); - let idx = Nat::from_ptr(idx); - let block = lean_ptr_to_address(block); - DefinitionProj { idx, block } -} - -fn lean_ptr_to_inductive(ptr: *const c_void) -> Inductive { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [lvls, params, indices, nested, typ, ctors, recr_refl_isunsafe] = - ctor.objs(); - let lvls = Nat::from_ptr(lvls); - let typ = lean_ptr_to_address(typ); - let params = Nat::from_ptr(params); - let indices = Nat::from_ptr(indices); - let ctors = collect_list(ctors, lean_ptr_to_constructor); - let nested = Nat::from_ptr(nested); - let [recr, refl, is_unsafe, ..] = (recr_refl_isunsafe as usize).to_le_bytes(); - let recr = recr == 1; - let refl = refl == 1; - let is_unsafe = is_unsafe == 1; - Inductive { lvls, typ, params, indices, ctors, nested, recr, refl, is_unsafe } -} - -fn lean_ptr_to_mut_const(ptr: *const c_void) -> MutConst { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [inner] = ctor.objs(); - match ctor.tag() { - 0 => MutConst::Defn(lean_ptr_to_definition(inner)), - 1 => MutConst::Indc(lean_ptr_to_inductive(inner)), - 2 => MutConst::Recr(lean_ptr_to_recursor(inner)), - _ => unreachable!(), - } -} - -fn lean_ptr_to_eval_claim(ptr: *const c_void) -> EvalClaim { - let evals: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [lvls, typ, input, output] = evals.objs().map(lean_ptr_to_address); - EvalClaim { lvls, typ, input, output } -} - -fn lean_ptr_to_check_claim(ptr: *const c_void) -> CheckClaim { - let checks: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [lvls, typ, value] = checks.objs().map(lean_ptr_to_address); - CheckClaim { lvls, typ, value } -} - -fn lean_ptr_to_proof(ptr: *const c_void) -> Proof { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [claim_ptr, proof_ptr] = ctor.objs(); - let claim_ctor: &LeanCtorObject = as_ref_unsafe(claim_ptr.cast()); - let [claim_inner] = claim_ctor.objs(); - let claim = match claim_ctor.tag() { - 0 => Claim::Evals(lean_ptr_to_eval_claim(claim_inner)), - 1 => Claim::Checks(lean_ptr_to_check_claim(claim_inner)), - _ => unreachable!(), - }; - let proof_sarray: &LeanSArrayObject = as_ref_unsafe(proof_ptr.cast()); - let proof = proof_sarray.data().to_vec(); - Proof { claim, proof } -} - -fn lean_ptr_to_comm(ptr: *const c_void) -> Comm { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [secret, payload] = ctor.objs().map(lean_ptr_to_address); - Comm { secret, payload } -} - -fn lean_ptr_to_address_pair(ptr: *const c_void) -> (Address, Address) { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [fst, snd] = ctor.objs().map(lean_ptr_to_address); - (fst, snd) -} - -fn lean_ptr_to_meta_address(ptr: *const c_void) -> MetaAddress { - let (data, meta) = lean_ptr_to_address_pair(ptr); - MetaAddress { data, meta } -} - -fn lean_ptr_to_env(ptr: *const c_void) -> Env { - let env = collect_list(ptr, lean_ptr_to_meta_address); - Env { env } -} - -fn lean_ptr_to_builtin(ptr: *const c_void) -> BuiltIn { - assert!(lean_is_scalar(ptr)); - match lean_unbox!(u8, ptr) { - 0 => BuiltIn::Obj, - 1 => BuiltIn::Neutral, - 2 => BuiltIn::Unreachable, - _ => unreachable!(), - } -} - -fn lean_ptr_to_data_value(ptr: *const c_void) -> DataValue { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [inner_ptr] = ctor.objs(); - match ctor.tag() { - 0 => DataValue::OfString(lean_ptr_to_address(inner_ptr)), - 1 => DataValue::OfBool(inner_ptr as usize == 1), - 2 => DataValue::OfName(lean_ptr_to_address(inner_ptr)), - 3 => DataValue::OfNat(lean_ptr_to_address(inner_ptr)), - 4 => DataValue::OfInt(lean_ptr_to_address(inner_ptr)), - 5 => DataValue::OfSyntax(lean_ptr_to_address(inner_ptr)), - _ => unreachable!(), - } -} - -fn lean_ptr_to_address_data_value_pair( - ptr: *const c_void, -) -> (Address, DataValue) { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [address, data_value] = ctor.objs(); - let address = lean_ptr_to_address(address); - let data_value = lean_ptr_to_data_value(data_value); - (address, data_value) -} - -fn lean_ptr_to_metadatum(ptr: *const c_void) -> Metadatum { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - match ctor.tag() { - 0 => { - let [addr] = ctor.objs(); - let addr = lean_ptr_to_address(addr); - Metadatum::Link(addr) - }, - 1 => { - let [info] = ctor.objs(); - let info = match info as usize { - 0 => BinderInfo::Default, - 1 => BinderInfo::Implicit, - 2 => BinderInfo::StrictImplicit, - 3 => BinderInfo::InstImplicit, - _ => unreachable!(), - }; - Metadatum::Info(info) - }, - 2 => { - let [hints] = ctor.objs(); - let hints = if lean_is_scalar(hints) { - match lean_unbox!(usize, hints) { - 0 => ReducibilityHints::Opaque, - 1 => ReducibilityHints::Abbrev, - _ => unreachable!(), - } - } else { - let ctor: &LeanCtorObject = as_ref_unsafe(hints.cast()); - let [height] = ctor.objs(); - ReducibilityHints::Regular(height as u32) - }; - Metadatum::Hints(hints) - }, - 3 => { - let [links] = ctor.objs(); - let links = collect_list(links, lean_ptr_to_address); - Metadatum::Links(links) - }, - 4 => { - let [pairs] = ctor.objs(); - let pairs = collect_list(pairs, lean_ptr_to_address_pair); - Metadatum::Map(pairs) - }, - 5 => { - let [kvmap] = ctor.objs(); - let kvmap = collect_list(kvmap, lean_ptr_to_address_data_value_pair); - Metadatum::KVMap(kvmap) - }, - 6 => { - let [muts] = ctor.objs(); - let muts = - collect_list(muts, |ptr| collect_list(ptr, lean_ptr_to_address)); - Metadatum::Muts(muts) - }, - _ => unreachable!(), - } -} - -fn lean_ptr_to_metadata(ptr: *const c_void) -> Metadata { - let nodes = collect_list(ptr, lean_ptr_to_metadatum); - Metadata { nodes } -} - -fn lean_ptr_to_ixon(ptr: *const c_void) -> Ixon { - if lean_is_scalar(ptr) { - return match lean_unbox!(u8, ptr) { - 0 => Ixon::NAnon, - 3 => Ixon::UZero, - _ => unreachable!(), - }; - } - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - match ctor.tag() { - 1 => { - let [a, b] = ctor.objs().map(lean_ptr_to_address); - Ixon::NStr(a, b) - }, - 2 => { - let [a, b] = ctor.objs().map(lean_ptr_to_address); - Ixon::NNum(a, b) - }, - 4 => { - let [a] = ctor.objs().map(lean_ptr_to_address); - Ixon::USucc(a) - }, - 5 => { - let [a, b] = ctor.objs().map(lean_ptr_to_address); - Ixon::UMax(a, b) - }, - 6 => { - let [a, b] = ctor.objs().map(lean_ptr_to_address); - Ixon::UIMax(a, b) - }, - 7 => { - let [a] = ctor.objs().map(Nat::from_ptr); - Ixon::UVar(a) - }, - 8 => { - let [a] = ctor.objs().map(Nat::from_ptr); - Ixon::EVar(a) - }, - 9 => { - let [a_ptr, bs_ptr] = ctor.objs(); - let a = lean_ptr_to_address(a_ptr); - let bs = collect_list(bs_ptr, lean_ptr_to_address); - Ixon::ERef(a, bs) - }, - 10 => { - let [a_ptr, bs_ptr] = ctor.objs(); - let a = Nat::from_ptr(a_ptr); - let bs = collect_list(bs_ptr, lean_ptr_to_address); - Ixon::ERec(a, bs) - }, - 11 => { - let [a_ptr, b_ptr, c_ptr] = ctor.objs(); - let a = lean_ptr_to_address(a_ptr); - let b = Nat::from_ptr(b_ptr); - let c = lean_ptr_to_address(c_ptr); - Ixon::EPrj(a, b, c) - }, - 12 => { - let [a] = ctor.objs().map(lean_ptr_to_address); - Ixon::ESort(a) - }, - 13 => { - let [a] = ctor.objs().map(lean_ptr_to_address); - Ixon::EStr(a) - }, - 14 => { - let [a] = ctor.objs().map(lean_ptr_to_address); - Ixon::ENat(a) - }, - 15 => { - let [a, b] = ctor.objs().map(lean_ptr_to_address); - Ixon::EApp(a, b) - }, - 16 => { - let [a, b] = ctor.objs().map(lean_ptr_to_address); - Ixon::ELam(a, b) - }, - 17 => { - let [a, b] = ctor.objs().map(lean_ptr_to_address); - Ixon::EAll(a, b) - }, - 18 => { - let [a_ptr, b_ptr, c_ptr, bool_ptr] = ctor.objs(); - let [a, b, c] = [a_ptr, b_ptr, c_ptr].map(lean_ptr_to_address); - let bool = bool_ptr as usize == 1; - Ixon::ELet(bool, a, b, c) - }, - 19 => { - let [a_ptr] = ctor.objs(); - let sarray: &LeanSArrayObject = as_ref_unsafe(a_ptr.cast()); - Ixon::Blob(sarray.data().to_vec()) - }, - 20 => { - let [a] = ctor.objs().map(lean_ptr_to_definition); - Ixon::Defn(a) - }, - 21 => { - let [a] = ctor.objs().map(lean_ptr_to_recursor); - Ixon::Recr(a) - }, - 22 => { - let [a] = ctor.objs().map(lean_ptr_to_axiom); - Ixon::Axio(a) - }, - 23 => { - let [a] = ctor.objs().map(lean_ptr_to_quotient); - Ixon::Quot(a) - }, - 24 => { - let [a] = ctor.objs().map(lean_ptr_to_constructor_proj); - Ixon::CPrj(a) - }, - 25 => { - let [a] = ctor.objs().map(lean_ptr_to_recursor_proj); - Ixon::RPrj(a) - }, - 26 => { - let [a] = ctor.objs().map(lean_ptr_to_inductive_proj); - Ixon::IPrj(a) - }, - 27 => { - let [a] = ctor.objs().map(lean_ptr_to_definition_proj); - Ixon::DPrj(a) - }, - 28 => { - let [a] = ctor.objs(); - Ixon::Muts(collect_list(a, lean_ptr_to_mut_const)) - }, - 29 => { - let [a] = ctor.objs().map(lean_ptr_to_proof); - Ixon::Prof(a) - }, - 30 => { - let [a] = ctor.objs().map(lean_ptr_to_eval_claim); - Ixon::Eval(a) - }, - 31 => { - let [a] = ctor.objs().map(lean_ptr_to_check_claim); - Ixon::Chck(a) - }, - 32 => { - let [a] = ctor.objs().map(lean_ptr_to_comm); - Ixon::Comm(a) - }, - 33 => { - let [a] = ctor.objs().map(lean_ptr_to_env); - Ixon::Envn(a) - }, - 34 => { - let [a] = ctor.objs().map(lean_ptr_to_builtin); - Ixon::Prim(a) - }, - 35 => { - let [a] = ctor.objs().map(lean_ptr_to_metadata); - Ixon::Meta(a) - }, - _ => unreachable!(), - } -} - -#[unsafe(no_mangle)] -extern "C" fn rs_eq_lean_rust_serialization( - ixon_ptr: *const c_void, - bytes: &LeanSArrayObject, -) -> bool { - let bytes_data = bytes.data(); - let mut buf = Vec::with_capacity(bytes_data.len()); - lean_ptr_to_ixon(ixon_ptr).put(&mut buf); - buf == bytes_data -} +//! FFI for Ixon types (canonical serialization format). +//! +//! This module provides build/decode/roundtrip functions for Ixon types used in +//! cross-implementation compatibility testing and serialization. + +pub mod compare; +pub mod constant; +pub mod enums; +pub mod env; +pub mod expr; +pub mod meta; +pub mod serialize; +pub mod sharing; +pub mod univ; + +pub use compare::*; +pub use constant::*; +pub use enums::*; +pub use env::*; +pub use expr::*; +pub use meta::*; +pub use serialize::*; +pub use sharing::*; +pub use univ::*; diff --git a/src/lean/ffi/ixon/compare.rs b/src/lean/ffi/ixon/compare.rs new file mode 100644 index 00000000..59232b22 --- /dev/null +++ b/src/lean/ffi/ixon/compare.rs @@ -0,0 +1,232 @@ +//! Cross-implementation compilation comparison FFI. + +use std::collections::HashMap; +use std::ffi::c_void; + +use crate::ix::compile::{BlockCache, CompileState, compile_env, compile_expr}; +use crate::ix::env::Name; +use crate::ix::ixon::serialize::put_expr; +use crate::ix::mutual::MutCtx; +use crate::lean::sarray::LeanSArrayObject; +use crate::lean::{lean_alloc_ctor, lean_ctor_set}; + +use super::super::lean_env::{ + Cache as LeanCache, GlobalCache, lean_ptr_to_expr, lean_ptr_to_name, +}; + +/// Rust-side compiled environment for block comparison. +pub struct RustBlockEnv { + pub blocks: HashMap, usize)>, // (serialized bytes, sharing count) +} + +/// Compare Lean's compiled expression output with Rust's compilation of the same input. +#[unsafe(no_mangle)] +pub extern "C" fn rs_compare_expr_compilation( + lean_expr_ptr: *const c_void, + lean_output: &LeanSArrayObject, + univ_ctx_size: u64, +) -> bool { + // Decode Lean.Expr to Rust's representation + let global_cache = GlobalCache::default(); + let mut cache = LeanCache::new(&global_cache); + let lean_expr = lean_ptr_to_expr(lean_expr_ptr, &mut cache); + + // Create universe params for de Bruijn indexing (u0, u1, u2, ...) + let univ_params: Vec = (0..univ_ctx_size) + .map(|i| Name::str(Name::anon(), format!("u{}", i))) + .collect(); + let mut_ctx = MutCtx::default(); + + // Create minimal compile state (no environment needed for simple exprs) + let compile_stt = CompileState::new_empty(); + let mut block_cache = BlockCache::default(); + + // Compile with Rust + let rust_expr = match compile_expr( + &lean_expr, + &univ_params, + &mut_ctx, + &mut block_cache, + &compile_stt, + ) { + Ok(expr) => expr, + Err(_) => return false, + }; + + // Serialize Rust's output + let mut rust_bytes = Vec::new(); + put_expr(&rust_expr, &mut rust_bytes); + + // Compare byte-for-byte + let lean_bytes = lean_output.data(); + rust_bytes == lean_bytes +} + +/// Build a BlockCompareResult Lean object. +fn build_block_compare_result( + matched: bool, + not_found: bool, + lean_size: u64, + rust_size: u64, + first_diff_offset: u64, +) -> *mut c_void { + unsafe { + if matched { + lean_alloc_ctor(0, 0, 0) // match + } else if not_found { + lean_alloc_ctor(2, 0, 0) // notFound + } else { + // mismatch + let obj = lean_alloc_ctor(1, 0, 24); + let base = obj.cast::(); + *base.add(8).cast::() = lean_size; + *base.add(16).cast::() = rust_size; + *base.add(24).cast::() = first_diff_offset; + obj + } + } +} + +/// Build a BlockCompareDetail Lean object. +fn build_block_compare_detail( + result: *mut c_void, + lean_sharing_len: u64, + rust_sharing_len: u64, +) -> *mut c_void { + unsafe { + let obj = lean_alloc_ctor(0, 1, 16); + lean_ctor_set(obj, 0, result); + let base = obj.cast::(); + *base.add(16).cast::() = lean_sharing_len; + *base.add(24).cast::() = rust_sharing_len; + obj + } +} + +/// Compare a single block by lowlink name. +/// +/// # Safety +/// +/// `rust_env` must be a valid pointer to a `RustBlockEnv`. +/// `lowlink_name` must be a valid Lean object pointer. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn rs_compare_block_v2( + rust_env: *const RustBlockEnv, + lowlink_name: *const c_void, + lean_bytes: &LeanSArrayObject, + lean_sharing_len: u64, +) -> *mut c_void { + let global_cache = GlobalCache::default(); + let name = lean_ptr_to_name(lowlink_name, &global_cache); + + let rust_env = unsafe { &*rust_env }; + let lean_data = lean_bytes.data(); + + // Look up Rust's compiled block + let (rust_bytes, rust_sharing_len) = match rust_env.blocks.get(&name) { + Some((bytes, sharing_len)) => (bytes, *sharing_len as u64), + None => { + // Block not found in Rust compilation + let result = + build_block_compare_result(false, true, lean_data.len() as u64, 0, 0); + return build_block_compare_detail(result, lean_sharing_len, 0); + }, + }; + + // Compare bytes + if rust_bytes == lean_data { + // Match + let result = build_block_compare_result( + true, + false, + lean_data.len() as u64, + rust_bytes.len() as u64, + 0, + ); + return build_block_compare_detail( + result, + lean_sharing_len, + rust_sharing_len, + ); + } + + // Mismatch: find first differing byte + let first_diff_offset = rust_bytes + .iter() + .zip(lean_data.iter()) + .position(|(a, b)| a != b) + .map_or_else( + || { + // One is a prefix of the other + rust_bytes.len().min(lean_data.len()) as u64 + }, + |i| i as u64, + ); + + let result = build_block_compare_result( + false, + false, + lean_data.len() as u64, + rust_bytes.len() as u64, + first_diff_offset, + ); + build_block_compare_detail(result, lean_sharing_len, rust_sharing_len) +} + +/// Free a RustBlockEnv pointer. +/// +/// # Safety +/// +/// `ptr` must be a valid pointer returned by `rs_build_compiled_env`, or null. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn rs_free_compiled_env(ptr: *mut RustBlockEnv) { + if !ptr.is_null() { + unsafe { + drop(Box::from_raw(ptr)); + } + } +} + +/// Build a RustBlockEnv from a Lean environment. +#[unsafe(no_mangle)] +pub extern "C" fn rs_build_compiled_env( + env_consts_ptr: *const c_void, +) -> *mut RustBlockEnv { + use super::super::lean_env::lean_ptr_to_env; + + // Decode Lean environment + let rust_env = lean_ptr_to_env(env_consts_ptr); + let rust_env = std::sync::Arc::new(rust_env); + + // Compile + let compile_stt = match compile_env(&rust_env) { + Ok(stt) => stt, + Err(_) => { + // Return empty env on error + return Box::into_raw(Box::new(RustBlockEnv { blocks: HashMap::new() })); + }, + }; + + // Collect blocks + let mut blocks = HashMap::new(); + let mut seen_addrs = std::collections::HashSet::new(); + + for entry in compile_stt.name_to_addr.iter() { + let name = entry.key().clone(); + let addr = entry.value().clone(); + + if seen_addrs.contains(&addr) { + continue; + } + seen_addrs.insert(addr.clone()); + + if let Some(constant) = compile_stt.env.get_const(&addr) { + let mut bytes = Vec::new(); + constant.put(&mut bytes); + let sharing_len = constant.sharing.len(); + blocks.insert(name, (bytes, sharing_len)); + } + } + + Box::into_raw(Box::new(RustBlockEnv { blocks })) +} diff --git a/src/lean/ffi/ixon/constant.rs b/src/lean/ffi/ixon/constant.rs new file mode 100644 index 00000000..e7692759 --- /dev/null +++ b/src/lean/ffi/ixon/constant.rs @@ -0,0 +1,808 @@ +//! Ixon constant types build/decode/roundtrip FFI. +//! +//! Includes: Definition, Axiom, Quotient, RecursorRule, Recursor, Constructor, +//! Inductive, InductiveProj, ConstructorProj, RecursorProj, DefinitionProj, +//! MutConst, ConstantInfo, Constant + +use std::ffi::c_void; +use std::sync::Arc; + +use crate::ix::address::Address; +use crate::ix::ixon::constant::{ + Axiom as IxonAxiom, Constant as IxonConstant, + ConstantInfo as IxonConstantInfo, Constructor as IxonConstructor, + ConstructorProj, DefKind, Definition as IxonDefinition, DefinitionProj, + Inductive as IxonInductive, InductiveProj, MutConst, + Quotient as IxonQuotient, Recursor as IxonRecursor, RecursorProj, + RecursorRule as IxonRecursorRule, +}; +use crate::lean::sarray::LeanSArrayObject; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_alloc_sarray, + lean_array_set_core, lean_ctor_get, lean_ctor_set, lean_obj_tag, + lean_sarray_cptr, +}; + +use super::expr::{ + build_ixon_expr, build_ixon_expr_array, decode_ixon_expr, + decode_ixon_expr_array, +}; +use super::univ::{build_ixon_univ_array, decode_ixon_univ_array}; + +/// Build Address from Ixon Address type (which is just a [u8; 32]). +pub fn build_address_from_ixon(addr: &Address) -> *mut c_void { + unsafe { + let ba = lean_alloc_sarray(1, 32, 32); + let data_ptr = lean_sarray_cptr(ba); + std::ptr::copy_nonoverlapping(addr.as_bytes().as_ptr(), data_ptr, 32); + ba + } +} + +/// Build an Array of Addresses. +pub fn build_address_array(addrs: &[Address]) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(addrs.len(), addrs.len()); + for (i, addr) in addrs.iter().enumerate() { + let addr_obj = build_address_from_ixon(addr); + lean_array_set_core(arr, i, addr_obj); + } + arr + } +} + +/// Build Ixon.Definition +/// Lean stores scalar fields ordered by size (largest first). +/// Layout: header(8) + typ(8) + value(8) + lvls(8) + kind(1) + safety(1) + padding(6) +pub fn build_ixon_definition(def: &IxonDefinition) -> *mut c_void { + unsafe { + let typ_obj = build_ixon_expr(&def.typ); + let value_obj = build_ixon_expr(&def.value); + // 2 obj fields, 16 scalar bytes (lvls(8) + kind(1) + safety(1) + padding(6)) + let obj = lean_alloc_ctor(0, 2, 16); + lean_ctor_set(obj, 0, typ_obj); + lean_ctor_set(obj, 1, value_obj); + let base = obj.cast::(); + let scalar_base = base.add(2 * 8 + 8); // offset 24 + + // lvls at offset 0 (8 bytes) - largest scalar first + *scalar_base.cast::() = def.lvls; + // kind at offset 8 (1 byte) + let kind_val: u8 = match def.kind { + DefKind::Definition => 0, + DefKind::Opaque => 1, + DefKind::Theorem => 2, + }; + *scalar_base.add(8) = kind_val; + // safety at offset 9 (1 byte) + let safety_val: u8 = match def.safety { + crate::ix::env::DefinitionSafety::Unsafe => 0, + crate::ix::env::DefinitionSafety::Safe => 1, + crate::ix::env::DefinitionSafety::Partial => 2, + }; + *scalar_base.add(9) = safety_val; + obj + } +} + +/// Build Ixon.RecursorRule +pub fn build_ixon_recursor_rule(rule: &IxonRecursorRule) -> *mut c_void { + unsafe { + let rhs_obj = build_ixon_expr(&rule.rhs); + // 1 obj field, 8 scalar bytes + let obj = lean_alloc_ctor(0, 1, 8); + lean_ctor_set(obj, 0, rhs_obj); + let base = obj.cast::(); + *base.add(8 + 8).cast::() = rule.fields; + obj + } +} + +/// Build Ixon.Recursor +/// Scalars ordered by size: lvls(8) + params(8) + indices(8) + motives(8) + minors(8) + k(1) + isUnsafe(1) + padding(6) +pub fn build_ixon_recursor(rec: &IxonRecursor) -> *mut c_void { + unsafe { + let typ_obj = build_ixon_expr(&rec.typ); + // Build rules array + let rules_arr = lean_alloc_array(rec.rules.len(), rec.rules.len()); + for (i, rule) in rec.rules.iter().enumerate() { + let rule_obj = build_ixon_recursor_rule(rule); + lean_array_set_core(rules_arr, i, rule_obj); + } + // 2 obj fields (typ, rules), 48 scalar bytes (5×8 + 1 + 1 + 6 padding) + let obj = lean_alloc_ctor(0, 2, 48); + lean_ctor_set(obj, 0, typ_obj); + lean_ctor_set(obj, 1, rules_arr); + let base = obj.cast::(); + let scalar_base = base.add(2 * 8 + 8); + // u64 fields first + *scalar_base.cast::() = rec.lvls; + *scalar_base.add(8).cast::() = rec.params; + *scalar_base.add(16).cast::() = rec.indices; + *scalar_base.add(24).cast::() = rec.motives; + *scalar_base.add(32).cast::() = rec.minors; + // bool fields last + *scalar_base.add(40) = if rec.k { 1 } else { 0 }; + *scalar_base.add(41) = if rec.is_unsafe { 1 } else { 0 }; + obj + } +} + +/// Build Ixon.Axiom +/// Scalars ordered by size: lvls(8) + isUnsafe(1) + padding(7) +pub fn build_ixon_axiom(ax: &IxonAxiom) -> *mut c_void { + unsafe { + let typ_obj = build_ixon_expr(&ax.typ); + // 1 obj field, 16 scalar bytes (lvls(8) + isUnsafe(1) + padding(7)) + let obj = lean_alloc_ctor(0, 1, 16); + lean_ctor_set(obj, 0, typ_obj); + let base = obj.cast::(); + let scalar_base = base.add(8 + 8); + // lvls at offset 0 + *scalar_base.cast::() = ax.lvls; + // isUnsafe at offset 8 + *scalar_base.add(8) = if ax.is_unsafe { 1 } else { 0 }; + obj + } +} + +/// Build Ixon.Quotient +/// QuotKind is a simple enum stored as scalar u8, not object field. +/// Scalars ordered by size: lvls(8) + kind(1) + padding(7) +pub fn build_ixon_quotient(quot: &IxonQuotient) -> *mut c_void { + unsafe { + let typ_obj = build_ixon_expr(".typ); + // 1 obj field (typ), 16 scalar bytes (lvls(8) + kind(1) + padding(7)) + let obj = lean_alloc_ctor(0, 1, 16); + lean_ctor_set(obj, 0, typ_obj); + let base = obj.cast::(); + let scalar_base = base.add(8 + 8); + // lvls at offset 0 + *scalar_base.cast::() = quot.lvls; + // kind at offset 8 + let kind_val: u8 = match quot.kind { + crate::ix::env::QuotKind::Type => 0, + crate::ix::env::QuotKind::Ctor => 1, + crate::ix::env::QuotKind::Lift => 2, + crate::ix::env::QuotKind::Ind => 3, + }; + *scalar_base.add(8) = kind_val; + obj + } +} + +/// Build Ixon.Constructor +/// Scalars ordered by size: lvls(8) + cidx(8) + params(8) + fields(8) + isUnsafe(1) + padding(7) +pub fn build_ixon_constructor(ctor: &IxonConstructor) -> *mut c_void { + unsafe { + let typ_obj = build_ixon_expr(&ctor.typ); + // 1 obj field, 40 scalar bytes (4×8 + 1 + 7 padding) + let obj = lean_alloc_ctor(0, 1, 40); + lean_ctor_set(obj, 0, typ_obj); + let base = obj.cast::(); + let scalar_base = base.add(8 + 8); + // u64 fields first + *scalar_base.cast::() = ctor.lvls; + *scalar_base.add(8).cast::() = ctor.cidx; + *scalar_base.add(16).cast::() = ctor.params; + *scalar_base.add(24).cast::() = ctor.fields; + // bool field last + *scalar_base.add(32) = if ctor.is_unsafe { 1 } else { 0 }; + obj + } +} + +/// Build Ixon.Inductive +/// Scalars ordered by size: lvls(8) + params(8) + indices(8) + nested(8) + recr(1) + refl(1) + isUnsafe(1) + padding(5) +pub fn build_ixon_inductive(ind: &IxonInductive) -> *mut c_void { + unsafe { + let typ_obj = build_ixon_expr(&ind.typ); + // Build ctors array + let ctors_arr = lean_alloc_array(ind.ctors.len(), ind.ctors.len()); + for (i, ctor) in ind.ctors.iter().enumerate() { + let ctor_obj = build_ixon_constructor(ctor); + lean_array_set_core(ctors_arr, i, ctor_obj); + } + // 2 obj fields, 40 scalar bytes (4×8 + 3 + 5 padding) + let obj = lean_alloc_ctor(0, 2, 40); + lean_ctor_set(obj, 0, typ_obj); + lean_ctor_set(obj, 1, ctors_arr); + let base = obj.cast::(); + let scalar_base = base.add(2 * 8 + 8); + // u64 fields first + *scalar_base.cast::() = ind.lvls; + *scalar_base.add(8).cast::() = ind.params; + *scalar_base.add(16).cast::() = ind.indices; + *scalar_base.add(24).cast::() = ind.nested; + // bool fields last + *scalar_base.add(32) = if ind.recr { 1 } else { 0 }; + *scalar_base.add(33) = if ind.refl { 1 } else { 0 }; + *scalar_base.add(34) = if ind.is_unsafe { 1 } else { 0 }; + obj + } +} + +/// Build Ixon.InductiveProj +pub fn build_inductive_proj(proj: &InductiveProj) -> *mut c_void { + unsafe { + let block_obj = build_address_from_ixon(&proj.block); + let obj = lean_alloc_ctor(0, 1, 8); + lean_ctor_set(obj, 0, block_obj); + let base = obj.cast::(); + *base.add(8 + 8).cast::() = proj.idx; + obj + } +} + +/// Build Ixon.ConstructorProj +pub fn build_constructor_proj(proj: &ConstructorProj) -> *mut c_void { + unsafe { + let block_obj = build_address_from_ixon(&proj.block); + let obj = lean_alloc_ctor(0, 1, 16); + lean_ctor_set(obj, 0, block_obj); + let base = obj.cast::(); + *base.add(8 + 8).cast::() = proj.idx; + *base.add(8 + 16).cast::() = proj.cidx; + obj + } +} + +/// Build Ixon.RecursorProj +pub fn build_recursor_proj(proj: &RecursorProj) -> *mut c_void { + unsafe { + let block_obj = build_address_from_ixon(&proj.block); + let obj = lean_alloc_ctor(0, 1, 8); + lean_ctor_set(obj, 0, block_obj); + let base = obj.cast::(); + *base.add(8 + 8).cast::() = proj.idx; + obj + } +} + +/// Build Ixon.DefinitionProj +pub fn build_definition_proj(proj: &DefinitionProj) -> *mut c_void { + unsafe { + let block_obj = build_address_from_ixon(&proj.block); + let obj = lean_alloc_ctor(0, 1, 8); + lean_ctor_set(obj, 0, block_obj); + let base = obj.cast::(); + *base.add(8 + 8).cast::() = proj.idx; + obj + } +} + +/// Build Ixon.MutConst +pub fn build_mut_const(mc: &MutConst) -> *mut c_void { + unsafe { + match mc { + MutConst::Defn(def) => { + let def_obj = build_ixon_definition(def); + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, def_obj); + obj + }, + MutConst::Indc(ind) => { + let ind_obj = build_ixon_inductive(ind); + let obj = lean_alloc_ctor(1, 1, 0); + lean_ctor_set(obj, 0, ind_obj); + obj + }, + MutConst::Recr(rec) => { + let rec_obj = build_ixon_recursor(rec); + let obj = lean_alloc_ctor(2, 1, 0); + lean_ctor_set(obj, 0, rec_obj); + obj + }, + } + } +} + +/// Build Ixon.ConstantInfo (9 constructors) +pub fn build_ixon_constant_info(info: &IxonConstantInfo) -> *mut c_void { + unsafe { + match info { + IxonConstantInfo::Defn(def) => { + let def_obj = build_ixon_definition(def); + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, def_obj); + obj + }, + IxonConstantInfo::Recr(rec) => { + let rec_obj = build_ixon_recursor(rec); + let obj = lean_alloc_ctor(1, 1, 0); + lean_ctor_set(obj, 0, rec_obj); + obj + }, + IxonConstantInfo::Axio(ax) => { + let ax_obj = build_ixon_axiom(ax); + let obj = lean_alloc_ctor(2, 1, 0); + lean_ctor_set(obj, 0, ax_obj); + obj + }, + IxonConstantInfo::Quot(quot) => { + let quot_obj = build_ixon_quotient(quot); + let obj = lean_alloc_ctor(3, 1, 0); + lean_ctor_set(obj, 0, quot_obj); + obj + }, + IxonConstantInfo::CPrj(proj) => { + let proj_obj = build_constructor_proj(proj); + let obj = lean_alloc_ctor(4, 1, 0); + lean_ctor_set(obj, 0, proj_obj); + obj + }, + IxonConstantInfo::RPrj(proj) => { + let proj_obj = build_recursor_proj(proj); + let obj = lean_alloc_ctor(5, 1, 0); + lean_ctor_set(obj, 0, proj_obj); + obj + }, + IxonConstantInfo::IPrj(proj) => { + let proj_obj = build_inductive_proj(proj); + let obj = lean_alloc_ctor(6, 1, 0); + lean_ctor_set(obj, 0, proj_obj); + obj + }, + IxonConstantInfo::DPrj(proj) => { + let proj_obj = build_definition_proj(proj); + let obj = lean_alloc_ctor(7, 1, 0); + lean_ctor_set(obj, 0, proj_obj); + obj + }, + IxonConstantInfo::Muts(muts) => { + let arr = lean_alloc_array(muts.len(), muts.len()); + for (i, mc) in muts.iter().enumerate() { + let mc_obj = build_mut_const(mc); + lean_array_set_core(arr, i, mc_obj); + } + let obj = lean_alloc_ctor(8, 1, 0); + lean_ctor_set(obj, 0, arr); + obj + }, + } + } +} + +/// Build Ixon.Constant +pub fn build_ixon_constant(constant: &IxonConstant) -> *mut c_void { + unsafe { + let info_obj = build_ixon_constant_info(&constant.info); + let sharing_obj = build_ixon_expr_array(&constant.sharing); + let refs_obj = build_address_array(&constant.refs); + let univs_obj = build_ixon_univ_array(&constant.univs); + let obj = lean_alloc_ctor(0, 4, 0); + lean_ctor_set(obj, 0, info_obj); + lean_ctor_set(obj, 1, sharing_obj); + lean_ctor_set(obj, 2, refs_obj); + lean_ctor_set(obj, 3, univs_obj); + obj + } +} + +// ============================================================================= +// Decode Functions +// ============================================================================= + +/// Decode a ByteArray (Address) to Address. +pub fn decode_ixon_address(ptr: *const c_void) -> Address { + let ba: &LeanSArrayObject = as_ref_unsafe(ptr.cast()); + let bytes = ba.data(); + Address::from_slice(&bytes[..32]).expect("Address should be 32 bytes") +} + +/// Decode Array Address. +pub fn decode_ixon_address_array(ptr: *const c_void) -> Vec
{ + let arr: &crate::lean::array::LeanArrayObject = as_ref_unsafe(ptr.cast()); + arr.to_vec(decode_ixon_address) +} + +/// Decode Ixon.Definition. +/// Lean stores scalar fields ordered by size (largest first). +/// Layout: header(8) + typ(8) + value(8) + lvls(8) + kind(1) + safety(1) + padding(6) +pub fn decode_ixon_definition(ptr: *const c_void) -> IxonDefinition { + unsafe { + let typ_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let value_ptr = lean_ctor_get(ptr.cast_mut(), 1); + + let base = ptr.cast::(); + // Scalars start after header (8) + 2 obj fields (16) = offset 24 + let scalar_base = base.add(24); + + // lvls at offset 0 (8 bytes) - largest scalar first + let lvls = *scalar_base.cast::(); + // kind at offset 8 (1 byte) + let kind_val = *scalar_base.add(8); + let kind = match kind_val { + 0 => DefKind::Definition, + 1 => DefKind::Opaque, + 2 => DefKind::Theorem, + _ => panic!("Invalid DefKind: {}", kind_val), + }; + // safety at offset 9 (1 byte) + let safety_val = *scalar_base.add(9); + let safety = match safety_val { + 0 => crate::ix::env::DefinitionSafety::Unsafe, + 1 => crate::ix::env::DefinitionSafety::Safe, + 2 => crate::ix::env::DefinitionSafety::Partial, + _ => panic!("Invalid DefinitionSafety: {}", safety_val), + }; + + IxonDefinition { + kind, + safety, + lvls, + typ: Arc::new(decode_ixon_expr(typ_ptr)), + value: Arc::new(decode_ixon_expr(value_ptr)), + } + } +} + +/// Decode Ixon.RecursorRule. +pub fn decode_ixon_recursor_rule(ptr: *const c_void) -> IxonRecursorRule { + unsafe { + let rhs_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let fields = *base.add(8 + 8).cast::(); + IxonRecursorRule { fields, rhs: Arc::new(decode_ixon_expr(rhs_ptr)) } + } +} + +/// Decode Ixon.Recursor. +/// Scalars ordered by size: lvls(8) + params(8) + indices(8) + motives(8) + minors(8) + k(1) + isUnsafe(1) + padding(6) +pub fn decode_ixon_recursor(ptr: *const c_void) -> IxonRecursor { + unsafe { + let typ_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let rules_ptr = lean_ctor_get(ptr.cast_mut(), 1); + let base = ptr.cast::(); + let scalar_base = base.add(2 * 8 + 8); + // u64 fields first + let lvls = *scalar_base.cast::(); + let params = *scalar_base.add(8).cast::(); + let indices = *scalar_base.add(16).cast::(); + let motives = *scalar_base.add(24).cast::(); + let minors = *scalar_base.add(32).cast::(); + // bool fields last + let k = *scalar_base.add(40) != 0; + let is_unsafe = *scalar_base.add(41) != 0; + + let rules_arr: &crate::lean::array::LeanArrayObject = + as_ref_unsafe(rules_ptr.cast()); + let rules = rules_arr.to_vec(decode_ixon_recursor_rule); + + IxonRecursor { + k, + is_unsafe, + lvls, + params, + indices, + motives, + minors, + typ: Arc::new(decode_ixon_expr(typ_ptr)), + rules, + } + } +} + +/// Decode Ixon.Axiom. +/// Scalars ordered by size: lvls(8) + isUnsafe(1) + padding(7) +pub fn decode_ixon_axiom(ptr: *const c_void) -> IxonAxiom { + unsafe { + let typ_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let scalar_base = base.add(8 + 8); + // lvls at offset 0 + let lvls = *scalar_base.cast::(); + // isUnsafe at offset 8 + let is_unsafe = *scalar_base.add(8) != 0; + IxonAxiom { is_unsafe, lvls, typ: Arc::new(decode_ixon_expr(typ_ptr)) } + } +} + +/// Decode Ixon.Quotient. +/// QuotKind is a scalar (not object field). Scalars: lvls(8) + kind(1) + padding(7) +pub fn decode_ixon_quotient(ptr: *const c_void) -> IxonQuotient { + unsafe { + // typ is the only object field (at index 0) + let typ_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let scalar_base = base.add(8 + 8); + // lvls at offset 0 + let lvls = *scalar_base.cast::(); + // kind at offset 8 + let kind_val = *scalar_base.add(8); + let kind = match kind_val { + 0 => crate::ix::env::QuotKind::Type, + 1 => crate::ix::env::QuotKind::Ctor, + 2 => crate::ix::env::QuotKind::Lift, + 3 => crate::ix::env::QuotKind::Ind, + _ => panic!("Invalid QuotKind: {}", kind_val), + }; + IxonQuotient { kind, lvls, typ: Arc::new(decode_ixon_expr(typ_ptr)) } + } +} + +/// Decode Ixon.Constructor. +/// Scalars ordered by size: lvls(8) + cidx(8) + params(8) + fields(8) + isUnsafe(1) + padding(7) +pub fn decode_ixon_constructor(ptr: *const c_void) -> IxonConstructor { + unsafe { + let typ_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let scalar_base = base.add(8 + 8); + // u64 fields first + let lvls = *scalar_base.cast::(); + let cidx = *scalar_base.add(8).cast::(); + let params = *scalar_base.add(16).cast::(); + let fields = *scalar_base.add(24).cast::(); + // bool field last + let is_unsafe = *scalar_base.add(32) != 0; + IxonConstructor { + is_unsafe, + lvls, + cidx, + params, + fields, + typ: Arc::new(decode_ixon_expr(typ_ptr)), + } + } +} + +/// Decode Ixon.Inductive. +/// Scalars ordered by size: lvls(8) + params(8) + indices(8) + nested(8) + recr(1) + refl(1) + isUnsafe(1) + padding(5) +pub fn decode_ixon_inductive(ptr: *const c_void) -> IxonInductive { + unsafe { + let typ_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let ctors_ptr = lean_ctor_get(ptr.cast_mut(), 1); + let base = ptr.cast::(); + let scalar_base = base.add(2 * 8 + 8); + // u64 fields first + let lvls = *scalar_base.cast::(); + let params = *scalar_base.add(8).cast::(); + let indices = *scalar_base.add(16).cast::(); + let nested = *scalar_base.add(24).cast::(); + // bool fields last + let recr = *scalar_base.add(32) != 0; + let refl = *scalar_base.add(33) != 0; + let is_unsafe = *scalar_base.add(34) != 0; + + let ctors_arr: &crate::lean::array::LeanArrayObject = + as_ref_unsafe(ctors_ptr.cast()); + let ctors = ctors_arr.to_vec(decode_ixon_constructor); + + IxonInductive { + recr, + refl, + is_unsafe, + lvls, + params, + indices, + nested, + typ: Arc::new(decode_ixon_expr(typ_ptr)), + ctors, + } + } +} + +/// Decode Ixon.InductiveProj. +pub fn decode_ixon_inductive_proj(ptr: *const c_void) -> InductiveProj { + unsafe { + let block_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let idx = *base.add(8 + 8).cast::(); + InductiveProj { idx, block: decode_ixon_address(block_ptr) } + } +} + +/// Decode Ixon.ConstructorProj. +pub fn decode_ixon_constructor_proj(ptr: *const c_void) -> ConstructorProj { + unsafe { + let block_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let idx = *base.add(8 + 8).cast::(); + let cidx = *base.add(8 + 16).cast::(); + ConstructorProj { idx, cidx, block: decode_ixon_address(block_ptr) } + } +} + +/// Decode Ixon.RecursorProj. +pub fn decode_ixon_recursor_proj(ptr: *const c_void) -> RecursorProj { + unsafe { + let block_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let idx = *base.add(8 + 8).cast::(); + RecursorProj { idx, block: decode_ixon_address(block_ptr) } + } +} + +/// Decode Ixon.DefinitionProj. +pub fn decode_ixon_definition_proj(ptr: *const c_void) -> DefinitionProj { + unsafe { + let block_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let idx = *base.add(8 + 8).cast::(); + DefinitionProj { idx, block: decode_ixon_address(block_ptr) } + } +} + +/// Decode Ixon.MutConst. +pub fn decode_ixon_mut_const(ptr: *const c_void) -> MutConst { + unsafe { + let tag = lean_obj_tag(ptr.cast_mut()); + let inner_ptr = lean_ctor_get(ptr.cast_mut(), 0); + match tag { + 0 => MutConst::Defn(decode_ixon_definition(inner_ptr)), + 1 => MutConst::Indc(decode_ixon_inductive(inner_ptr)), + 2 => MutConst::Recr(decode_ixon_recursor(inner_ptr)), + _ => panic!("Invalid Ixon.MutConst tag: {}", tag), + } + } +} + +/// Decode Ixon.ConstantInfo. +pub fn decode_ixon_constant_info(ptr: *const c_void) -> IxonConstantInfo { + unsafe { + let tag = lean_obj_tag(ptr.cast_mut()); + let inner_ptr = lean_ctor_get(ptr.cast_mut(), 0); + match tag { + 0 => IxonConstantInfo::Defn(decode_ixon_definition(inner_ptr)), + 1 => IxonConstantInfo::Recr(decode_ixon_recursor(inner_ptr)), + 2 => IxonConstantInfo::Axio(decode_ixon_axiom(inner_ptr)), + 3 => IxonConstantInfo::Quot(decode_ixon_quotient(inner_ptr)), + 4 => IxonConstantInfo::CPrj(decode_ixon_constructor_proj(inner_ptr)), + 5 => IxonConstantInfo::RPrj(decode_ixon_recursor_proj(inner_ptr)), + 6 => IxonConstantInfo::IPrj(decode_ixon_inductive_proj(inner_ptr)), + 7 => IxonConstantInfo::DPrj(decode_ixon_definition_proj(inner_ptr)), + 8 => { + let muts_arr: &crate::lean::array::LeanArrayObject = + as_ref_unsafe(inner_ptr.cast()); + let muts = muts_arr.to_vec(decode_ixon_mut_const); + IxonConstantInfo::Muts(muts) + }, + _ => panic!("Invalid Ixon.ConstantInfo tag: {}", tag), + } + } +} + +/// Decode Ixon.Constant. +pub fn decode_ixon_constant(ptr: *const c_void) -> IxonConstant { + unsafe { + let info_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let sharing_ptr = lean_ctor_get(ptr.cast_mut(), 1); + let refs_ptr = lean_ctor_get(ptr.cast_mut(), 2); + let univs_ptr = lean_ctor_get(ptr.cast_mut(), 3); + + IxonConstant { + info: decode_ixon_constant_info(info_ptr), + sharing: decode_ixon_expr_array(sharing_ptr), + refs: decode_ixon_address_array(refs_ptr), + univs: decode_ixon_univ_array(univs_ptr), + } + } +} + +// ============================================================================= +// FFI Exports +// ============================================================================= + +/// Round-trip Ixon.Definition. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_definition( + ptr: *const c_void, +) -> *mut c_void { + let def = decode_ixon_definition(ptr); + build_ixon_definition(&def) +} + +/// Round-trip Ixon.Recursor. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_recursor( + ptr: *const c_void, +) -> *mut c_void { + let rec = decode_ixon_recursor(ptr); + build_ixon_recursor(&rec) +} + +/// Round-trip Ixon.Axiom. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_axiom(ptr: *const c_void) -> *mut c_void { + let ax = decode_ixon_axiom(ptr); + build_ixon_axiom(&ax) +} + +/// Round-trip Ixon.Quotient. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_quotient( + ptr: *const c_void, +) -> *mut c_void { + let quot = decode_ixon_quotient(ptr); + build_ixon_quotient(") +} + +/// Round-trip Ixon.ConstantInfo. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_constant_info( + ptr: *const c_void, +) -> *mut c_void { + let info = decode_ixon_constant_info(ptr); + build_ixon_constant_info(&info) +} + +/// Round-trip Ixon.Constant. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_constant( + ptr: *const c_void, +) -> *mut c_void { + let constant = decode_ixon_constant(ptr); + build_ixon_constant(&constant) +} + +/// Round-trip Ixon.RecursorRule. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_recursor_rule( + ptr: *const c_void, +) -> *mut c_void { + let rule = decode_ixon_recursor_rule(ptr); + build_ixon_recursor_rule(&rule) +} + +/// Round-trip Ixon.Constructor. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_constructor( + ptr: *const c_void, +) -> *mut c_void { + let ctor = decode_ixon_constructor(ptr); + build_ixon_constructor(&ctor) +} + +/// Round-trip Ixon.Inductive. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_inductive( + ptr: *const c_void, +) -> *mut c_void { + let ind = decode_ixon_inductive(ptr); + build_ixon_inductive(&ind) +} + +/// Round-trip Ixon.InductiveProj. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_inductive_proj( + ptr: *const c_void, +) -> *mut c_void { + let proj = decode_ixon_inductive_proj(ptr); + build_inductive_proj(&proj) +} + +/// Round-trip Ixon.ConstructorProj. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_constructor_proj( + ptr: *const c_void, +) -> *mut c_void { + let proj = decode_ixon_constructor_proj(ptr); + build_constructor_proj(&proj) +} + +/// Round-trip Ixon.RecursorProj. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_recursor_proj( + ptr: *const c_void, +) -> *mut c_void { + let proj = decode_ixon_recursor_proj(ptr); + build_recursor_proj(&proj) +} + +/// Round-trip Ixon.DefinitionProj. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_definition_proj( + ptr: *const c_void, +) -> *mut c_void { + let proj = decode_ixon_definition_proj(ptr); + build_definition_proj(&proj) +} + +/// Round-trip Ixon.MutConst. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_mut_const( + ptr: *const c_void, +) -> *mut c_void { + let mc = decode_ixon_mut_const(ptr); + build_mut_const(&mc) +} diff --git a/src/lean/ffi/ixon/enums.rs b/src/lean/ffi/ixon/enums.rs new file mode 100644 index 00000000..e8c4b12c --- /dev/null +++ b/src/lean/ffi/ixon/enums.rs @@ -0,0 +1,117 @@ +//! Ixon enum types: DefKind, DefinitionSafety, QuotKind build/decode/roundtrip FFI. + +use std::ffi::c_void; + +use crate::ix::env::{DefinitionSafety, QuotKind}; +use crate::ix::ixon::constant::DefKind; + +/// Build Ixon.DefKind +/// | defn -- tag 0 +/// | opaq -- tag 1 +/// | thm -- tag 2 +/// Simple enums are represented as raw tag values (unboxed scalars). +pub fn build_def_kind(kind: &DefKind) -> *mut c_void { + let tag = match kind { + DefKind::Definition => 0, + DefKind::Opaque => 1, + DefKind::Theorem => 2, + }; + tag as *mut c_void +} + +/// Build Ixon.DefinitionSafety +/// | unsaf -- tag 0 +/// | safe -- tag 1 +/// | part -- tag 2 +pub fn build_ixon_definition_safety(safety: &DefinitionSafety) -> *mut c_void { + let tag = match safety { + DefinitionSafety::Unsafe => 0, + DefinitionSafety::Safe => 1, + DefinitionSafety::Partial => 2, + }; + tag as *mut c_void +} + +/// Build Ixon.QuotKind +/// | type -- tag 0 +/// | ctor -- tag 1 +/// | lift -- tag 2 +/// | ind -- tag 3 +pub fn build_ixon_quot_kind(kind: &QuotKind) -> *mut c_void { + let tag = match kind { + QuotKind::Type => 0, + QuotKind::Ctor => 1, + QuotKind::Lift => 2, + QuotKind::Ind => 3, + }; + tag as *mut c_void +} + +// ============================================================================= +// Decode Functions +// ============================================================================= + +/// Decode Ixon.DefKind (simple enum, raw tag value). +pub fn decode_ixon_def_kind(ptr: *const c_void) -> DefKind { + let tag = ptr as usize; + match tag { + 0 => DefKind::Definition, + 1 => DefKind::Opaque, + 2 => DefKind::Theorem, + _ => panic!("Invalid Ixon.DefKind tag: {}", tag), + } +} + +/// Decode Ixon.DefinitionSafety (simple enum, raw tag value). +pub fn decode_ixon_definition_safety(ptr: *const c_void) -> DefinitionSafety { + let tag = ptr as usize; + match tag { + 0 => DefinitionSafety::Unsafe, + 1 => DefinitionSafety::Safe, + 2 => DefinitionSafety::Partial, + _ => panic!("Invalid Ixon.DefinitionSafety tag: {}", tag), + } +} + +/// Decode Ixon.QuotKind (simple enum, raw tag value). +pub fn decode_ixon_quot_kind(ptr: *const c_void) -> QuotKind { + let tag = ptr as usize; + match tag { + 0 => QuotKind::Type, + 1 => QuotKind::Ctor, + 2 => QuotKind::Lift, + 3 => QuotKind::Ind, + _ => panic!("Invalid Ixon.QuotKind tag: {}", tag), + } +} + +// ============================================================================= +// FFI Exports +// ============================================================================= + +/// Round-trip Ixon.DefKind. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_def_kind( + ptr: *const c_void, +) -> *mut c_void { + let kind = decode_ixon_def_kind(ptr); + build_def_kind(&kind) +} + +/// Round-trip Ixon.DefinitionSafety. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_definition_safety( + ptr: *const c_void, +) -> *mut c_void { + let safety = decode_ixon_definition_safety(ptr); + build_ixon_definition_safety(&safety) +} + +/// Round-trip Ixon.QuotKind. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_quot_kind( + ptr: *const c_void, +) -> *mut c_void { + let kind = decode_ixon_quot_kind(ptr); + build_ixon_quot_kind(&kind) +} diff --git a/src/lean/ffi/ixon/env.rs b/src/lean/ffi/ixon/env.rs new file mode 100644 index 00000000..68781735 --- /dev/null +++ b/src/lean/ffi/ixon/env.rs @@ -0,0 +1,474 @@ +//! Ixon.RawEnv FFI build/decode/roundtrip functions. +//! +//! Provides full decode/build cycle for RawEnv and its component types: +//! RawConst, RawNamed, RawBlob, RawComm. + +use std::ffi::c_void; + +use crate::ix::address::Address; +use crate::ix::env::Name; +use crate::ix::ixon::comm::Comm; +use crate::ix::ixon::constant::Constant as IxonConstant; +use crate::ix::ixon::env::{Env as IxonEnv, Named as IxonNamed}; +use crate::ix::ixon::metadata::ConstantMeta; +use crate::lean::array::LeanArrayObject; +use crate::lean::sarray::LeanSArrayObject; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_alloc_sarray, + lean_array_set_core, lean_ctor_get, lean_ctor_set, lean_mk_string, + lean_sarray_cptr, +}; + +use super::constant::{ + build_address_from_ixon, build_ixon_constant, decode_ixon_address, + decode_ixon_constant, +}; +use super::meta::{build_constant_meta, decode_constant_meta}; +use crate::lean::ffi::builder::LeanBuildCache; +use crate::lean::ffi::ix::name::{build_name, decode_ix_name}; + +// ============================================================================= +// Comm Type (secret: Address, payload: Address) +// ============================================================================= + +/// Decoded Ixon.Comm +pub struct DecodedComm { + pub secret: Address, + pub payload: Address, +} + +/// Decode Ixon.Comm from Lean pointer. +/// Comm = { secret : Address, payload : Address } +pub fn decode_comm(ptr: *const c_void) -> DecodedComm { + unsafe { + let secret_ptr = lean_ctor_get(ptr as *mut _, 0); + let payload_ptr = lean_ctor_get(ptr as *mut _, 1); + DecodedComm { + secret: decode_ixon_address(secret_ptr), + payload: decode_ixon_address(payload_ptr), + } + } +} + +/// Build Ixon.Comm Lean object. +pub fn build_comm(comm: &DecodedComm) -> *mut c_void { + unsafe { + let secret_obj = build_address_from_ixon(&comm.secret); + let payload_obj = build_address_from_ixon(&comm.payload); + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, secret_obj); + lean_ctor_set(obj, 1, payload_obj); + obj + } +} + +// ============================================================================= +// RawConst (addr: Address, const: Constant) +// ============================================================================= + +/// Decoded Ixon.RawConst +pub struct DecodedRawConst { + pub addr: Address, + pub constant: IxonConstant, +} + +/// Decode Ixon.RawConst from Lean pointer. +pub fn decode_raw_const(ptr: *const c_void) -> DecodedRawConst { + unsafe { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + let const_ptr = lean_ctor_get(ptr as *mut _, 1); + DecodedRawConst { + addr: decode_ixon_address(addr_ptr), + constant: decode_ixon_constant(const_ptr), + } + } +} + +/// Build Ixon.RawConst Lean object. +pub fn build_raw_const(rc: &DecodedRawConst) -> *mut c_void { + unsafe { + let addr_obj = build_address_from_ixon(&rc.addr); + let const_obj = build_ixon_constant(&rc.constant); + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, addr_obj); + lean_ctor_set(obj, 1, const_obj); + obj + } +} + +// ============================================================================= +// RawNamed (name: Ix.Name, addr: Address, constMeta: ConstantMeta) +// ============================================================================= + +/// Decoded Ixon.RawNamed +pub struct DecodedRawNamed { + pub name: Name, + pub addr: Address, + pub const_meta: ConstantMeta, +} + +/// Decode Ixon.RawNamed from Lean pointer. +pub fn decode_raw_named(ptr: *const c_void) -> DecodedRawNamed { + unsafe { + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let addr_ptr = lean_ctor_get(ptr as *mut _, 1); + let meta_ptr = lean_ctor_get(ptr as *mut _, 2); + DecodedRawNamed { + name: decode_ix_name(name_ptr), + addr: decode_ixon_address(addr_ptr), + const_meta: decode_constant_meta(meta_ptr), + } + } +} + +/// Build Ixon.RawNamed Lean object. +pub fn build_raw_named( + cache: &mut LeanBuildCache, + rn: &DecodedRawNamed, +) -> *mut c_void { + unsafe { + let name_obj = build_name(cache, &rn.name); + let addr_obj = build_address_from_ixon(&rn.addr); + let meta_obj = build_constant_meta(&rn.const_meta); + let obj = lean_alloc_ctor(0, 3, 0); + lean_ctor_set(obj, 0, name_obj); + lean_ctor_set(obj, 1, addr_obj); + lean_ctor_set(obj, 2, meta_obj); + obj + } +} + +// ============================================================================= +// RawBlob (addr: Address, bytes: ByteArray) +// ============================================================================= + +/// Decoded Ixon.RawBlob +pub struct DecodedRawBlob { + pub addr: Address, + pub bytes: Vec, +} + +/// Decode Ixon.RawBlob from Lean pointer. +pub fn decode_raw_blob(ptr: *const c_void) -> DecodedRawBlob { + unsafe { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + let bytes_ptr = lean_ctor_get(ptr as *mut _, 1); + let bytes_arr: &LeanSArrayObject = as_ref_unsafe(bytes_ptr.cast()); + DecodedRawBlob { + addr: decode_ixon_address(addr_ptr), + bytes: bytes_arr.data().to_vec(), + } + } +} + +/// Build Ixon.RawBlob Lean object. +pub fn build_raw_blob(rb: &DecodedRawBlob) -> *mut c_void { + unsafe { + let addr_obj = build_address_from_ixon(&rb.addr); + // Build ByteArray (SArray UInt8) + let len = rb.bytes.len(); + let bytes_obj = lean_alloc_sarray(1, len, len); + let data_ptr = lean_sarray_cptr(bytes_obj); + std::ptr::copy_nonoverlapping(rb.bytes.as_ptr(), data_ptr, len); + + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, addr_obj); + lean_ctor_set(obj, 1, bytes_obj); + obj + } +} + +// ============================================================================= +// RawComm (addr: Address, comm: Comm) +// ============================================================================= + +/// Decoded Ixon.RawComm +pub struct DecodedRawComm { + pub addr: Address, + pub comm: DecodedComm, +} + +/// Decode Ixon.RawComm from Lean pointer. +pub fn decode_raw_comm(ptr: *const c_void) -> DecodedRawComm { + unsafe { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + let comm_ptr = lean_ctor_get(ptr as *mut _, 1); + DecodedRawComm { + addr: decode_ixon_address(addr_ptr), + comm: decode_comm(comm_ptr), + } + } +} + +/// Build Ixon.RawComm Lean object. +pub fn build_raw_comm(rc: &DecodedRawComm) -> *mut c_void { + unsafe { + let addr_obj = build_address_from_ixon(&rc.addr); + let comm_obj = build_comm(&rc.comm); + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, addr_obj); + lean_ctor_set(obj, 1, comm_obj); + obj + } +} + +// ============================================================================= +// RawNameEntry (addr: Address, name: Ix.Name) +// ============================================================================= + +/// Decoded Ixon.RawNameEntry +pub struct DecodedRawNameEntry { + pub addr: Address, + pub name: Name, +} + +/// Decode Ixon.RawNameEntry from Lean pointer. +pub fn decode_raw_name_entry(ptr: *const c_void) -> DecodedRawNameEntry { + unsafe { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + let name_ptr = lean_ctor_get(ptr as *mut _, 1); + DecodedRawNameEntry { + addr: decode_ixon_address(addr_ptr), + name: decode_ix_name(name_ptr), + } + } +} + +/// Build Ixon.RawNameEntry Lean object. +pub fn build_raw_name_entry( + cache: &mut LeanBuildCache, + addr: &Address, + name: &Name, +) -> *mut c_void { + unsafe { + let addr_obj = build_address_from_ixon(addr); + let name_obj = build_name(cache, name); + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, addr_obj); + lean_ctor_set(obj, 1, name_obj); + obj + } +} + +// ============================================================================= +// RawEnv (consts, named, blobs, comms, names) +// ============================================================================= + +/// Decoded Ixon.RawEnv +pub struct DecodedRawEnv { + pub consts: Vec, + pub named: Vec, + pub blobs: Vec, + pub comms: Vec, + pub names: Vec, +} + +/// Decode Ixon.RawEnv from Lean pointer. +pub fn decode_raw_env(ptr: *const c_void) -> DecodedRawEnv { + unsafe { + let consts_ptr = lean_ctor_get(ptr as *mut _, 0); + let named_ptr = lean_ctor_get(ptr as *mut _, 1); + let blobs_ptr = lean_ctor_get(ptr as *mut _, 2); + let comms_ptr = lean_ctor_get(ptr as *mut _, 3); + let names_ptr = lean_ctor_get(ptr as *mut _, 4); + + let consts_arr: &LeanArrayObject = as_ref_unsafe(consts_ptr.cast()); + let named_arr: &LeanArrayObject = as_ref_unsafe(named_ptr.cast()); + let blobs_arr: &LeanArrayObject = as_ref_unsafe(blobs_ptr.cast()); + let comms_arr: &LeanArrayObject = as_ref_unsafe(comms_ptr.cast()); + let names_arr: &LeanArrayObject = as_ref_unsafe(names_ptr.cast()); + + DecodedRawEnv { + consts: consts_arr.to_vec(decode_raw_const), + named: named_arr.to_vec(decode_raw_named), + blobs: blobs_arr.to_vec(decode_raw_blob), + comms: comms_arr.to_vec(decode_raw_comm), + names: names_arr.to_vec(decode_raw_name_entry), + } + } +} + +/// Build Ixon.RawEnv Lean object. +pub fn build_raw_env(env: &DecodedRawEnv) -> *mut c_void { + unsafe { + let mut cache = LeanBuildCache::new(); + + // Build consts array + let consts_arr = lean_alloc_array(env.consts.len(), env.consts.len()); + for (i, rc) in env.consts.iter().enumerate() { + let obj = build_raw_const(rc); + lean_array_set_core(consts_arr, i, obj); + } + + // Build named array + let named_arr = lean_alloc_array(env.named.len(), env.named.len()); + for (i, rn) in env.named.iter().enumerate() { + let obj = build_raw_named(&mut cache, rn); + lean_array_set_core(named_arr, i, obj); + } + + // Build blobs array + let blobs_arr = lean_alloc_array(env.blobs.len(), env.blobs.len()); + for (i, rb) in env.blobs.iter().enumerate() { + let obj = build_raw_blob(rb); + lean_array_set_core(blobs_arr, i, obj); + } + + // Build comms array + let comms_arr = lean_alloc_array(env.comms.len(), env.comms.len()); + for (i, rc) in env.comms.iter().enumerate() { + let obj = build_raw_comm(rc); + lean_array_set_core(comms_arr, i, obj); + } + + // Build names array + let names_arr = lean_alloc_array(env.names.len(), env.names.len()); + for (i, rn) in env.names.iter().enumerate() { + let obj = build_raw_name_entry(&mut cache, &rn.addr, &rn.name); + lean_array_set_core(names_arr, i, obj); + } + + // Build RawEnv structure + let obj = lean_alloc_ctor(0, 5, 0); + lean_ctor_set(obj, 0, consts_arr); + lean_ctor_set(obj, 1, named_arr); + lean_ctor_set(obj, 2, blobs_arr); + lean_ctor_set(obj, 3, comms_arr); + lean_ctor_set(obj, 4, names_arr); + obj + } +} + +// ============================================================================= +// DecodedRawEnv ↔ IxonEnv Conversion Helpers +// ============================================================================= + +/// Reconstruct a Rust IxonEnv from a DecodedRawEnv. +pub fn decoded_to_ixon_env(decoded: &DecodedRawEnv) -> IxonEnv { + let env = IxonEnv::new(); + for rc in &decoded.consts { + env.store_const(rc.addr.clone(), rc.constant.clone()); + } + for rn in &decoded.names { + env.store_name(rn.addr.clone(), rn.name.clone()); + } + for rn in &decoded.named { + let named = IxonNamed::new(rn.addr.clone(), rn.const_meta.clone()); + env.register_name(rn.name.clone(), named); + } + for rb in &decoded.blobs { + env.blobs.insert(rb.addr.clone(), rb.bytes.clone()); + } + for rc in &decoded.comms { + let comm = + Comm { secret: rc.comm.secret.clone(), payload: rc.comm.payload.clone() }; + env.store_comm(rc.addr.clone(), comm); + } + env +} + +/// Convert a Rust IxonEnv to a DecodedRawEnv. +pub fn ixon_env_to_decoded(env: &IxonEnv) -> DecodedRawEnv { + let consts = env + .consts + .iter() + .map(|e| DecodedRawConst { + addr: e.key().clone(), + constant: e.value().clone(), + }) + .collect(); + let named = env + .named + .iter() + .map(|e| DecodedRawNamed { + name: e.key().clone(), + addr: e.value().addr.clone(), + const_meta: e.value().meta.clone(), + }) + .collect(); + let blobs = env + .blobs + .iter() + .map(|e| DecodedRawBlob { addr: e.key().clone(), bytes: e.value().clone() }) + .collect(); + let comms = env + .comms + .iter() + .map(|e| DecodedRawComm { + addr: e.key().clone(), + comm: DecodedComm { + secret: e.value().secret.clone(), + payload: e.value().payload.clone(), + }, + }) + .collect(); + let names = env + .names + .iter() + .map(|e| DecodedRawNameEntry { + addr: e.key().clone(), + name: e.value().clone(), + }) + .collect(); + DecodedRawEnv { consts, named, blobs, comms, names } +} + +// ============================================================================= +// rs_ser_env: Serialize an Ixon.RawEnv to bytes +// ============================================================================= + +/// FFI: Serialize an Ixon.RawEnv → ByteArray via Rust's Env.put. Pure. +#[unsafe(no_mangle)] +pub extern "C" fn rs_ser_env(raw_env_ptr: *const c_void) -> *mut c_void { + let decoded = decode_raw_env(raw_env_ptr); + let env = decoded_to_ixon_env(&decoded); + let mut buf = Vec::new(); + env.put(&mut buf).expect("Env serialization failed"); + + unsafe { + let ba = lean_alloc_sarray(1, buf.len(), buf.len()); + std::ptr::copy_nonoverlapping( + buf.as_ptr(), + lean_sarray_cptr(ba), + buf.len(), + ); + ba + } +} + +// ============================================================================= +// rs_des_env: Deserialize bytes to an Ixon.RawEnv +// ============================================================================= + +/// FFI: Deserialize ByteArray → Except String Ixon.RawEnv via Rust's Env.get. Pure. +#[unsafe(no_mangle)] +pub extern "C" fn rs_des_env(bytes_ptr: *const c_void) -> *mut c_void { + let bytes_arr: &LeanSArrayObject = as_ref_unsafe(bytes_ptr.cast()); + let data = bytes_arr.data(); + let mut slice: &[u8] = data; + match IxonEnv::get(&mut slice) { + Ok(env) => { + let decoded = ixon_env_to_decoded(&env); + let raw_env = build_raw_env(&decoded); + // Except.ok (tag 1) + unsafe { + let obj = lean_alloc_ctor(1, 1, 0); + lean_ctor_set(obj, 0, raw_env); + obj + } + }, + Err(e) => { + // Except.error (tag 0) + let msg = std::ffi::CString::new(format!("rs_des_env: {}", e)) + .unwrap_or_else(|_| { + std::ffi::CString::new("rs_des_env: deserialization error").unwrap() + }); + unsafe { + let lean_str = lean_mk_string(msg.as_ptr()); + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, lean_str); + obj + } + }, + } +} diff --git a/src/lean/ffi/ixon/expr.rs b/src/lean/ffi/ixon/expr.rs new file mode 100644 index 00000000..060d91b2 --- /dev/null +++ b/src/lean/ffi/ixon/expr.rs @@ -0,0 +1,287 @@ +//! Ixon.Expr build/decode/roundtrip FFI. + +use std::ffi::c_void; +use std::sync::Arc; + +use crate::ix::ixon::expr::Expr as IxonExpr; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_array_set_core, + lean_ctor_get, lean_ctor_set, lean_obj_tag, +}; +use crate::lean_unbox; + +/// Build Ixon.Expr (12 constructors). +pub fn build_ixon_expr(expr: &IxonExpr) -> *mut c_void { + unsafe { + match expr { + IxonExpr::Sort(idx) => { + let obj = lean_alloc_ctor(0, 0, 8); + let base = obj.cast::(); + *base.add(8).cast::() = *idx; + obj + }, + IxonExpr::Var(idx) => { + let obj = lean_alloc_ctor(1, 0, 8); + let base = obj.cast::(); + *base.add(8).cast::() = *idx; + obj + }, + IxonExpr::Ref(ref_idx, univ_idxs) => { + let arr = lean_alloc_array(univ_idxs.len(), univ_idxs.len()); + for (i, idx) in univ_idxs.iter().enumerate() { + // Build heap-boxed UInt64: ctor with tag 0, 0 obj fields, 8 scalar bytes + let uint64_obj = lean_alloc_ctor(0, 0, 8); + let base = uint64_obj.cast::(); + *base.add(8).cast::() = *idx; + lean_array_set_core(arr, i, uint64_obj); + } + let obj = lean_alloc_ctor(2, 1, 8); + lean_ctor_set(obj, 0, arr); + let base = obj.cast::(); + *base.add(8 + 8).cast::() = *ref_idx; + obj + }, + IxonExpr::Rec(rec_idx, univ_idxs) => { + let arr = lean_alloc_array(univ_idxs.len(), univ_idxs.len()); + for (i, idx) in univ_idxs.iter().enumerate() { + let uint64_obj = lean_alloc_ctor(0, 0, 8); + let base = uint64_obj.cast::(); + *base.add(8).cast::() = *idx; + lean_array_set_core(arr, i, uint64_obj); + } + let obj = lean_alloc_ctor(3, 1, 8); + lean_ctor_set(obj, 0, arr); + let base = obj.cast::(); + *base.add(8 + 8).cast::() = *rec_idx; + obj + }, + IxonExpr::Prj(type_ref_idx, field_idx, val) => { + let val_obj = build_ixon_expr(val); + let obj = lean_alloc_ctor(4, 1, 16); + lean_ctor_set(obj, 0, val_obj); + let base = obj.cast::(); + *base.add(8 + 8).cast::() = *type_ref_idx; + *base.add(8 + 16).cast::() = *field_idx; + obj + }, + IxonExpr::Str(ref_idx) => { + let obj = lean_alloc_ctor(5, 0, 8); + let base = obj.cast::(); + *base.add(8).cast::() = *ref_idx; + obj + }, + IxonExpr::Nat(ref_idx) => { + let obj = lean_alloc_ctor(6, 0, 8); + let base = obj.cast::(); + *base.add(8).cast::() = *ref_idx; + obj + }, + IxonExpr::App(fun, arg) => { + let fun_obj = build_ixon_expr(fun); + let arg_obj = build_ixon_expr(arg); + let obj = lean_alloc_ctor(7, 2, 0); + lean_ctor_set(obj, 0, fun_obj); + lean_ctor_set(obj, 1, arg_obj); + obj + }, + IxonExpr::Lam(ty, body) => { + let ty_obj = build_ixon_expr(ty); + let body_obj = build_ixon_expr(body); + let obj = lean_alloc_ctor(8, 2, 0); + lean_ctor_set(obj, 0, ty_obj); + lean_ctor_set(obj, 1, body_obj); + obj + }, + IxonExpr::All(ty, body) => { + let ty_obj = build_ixon_expr(ty); + let body_obj = build_ixon_expr(body); + let obj = lean_alloc_ctor(9, 2, 0); + lean_ctor_set(obj, 0, ty_obj); + lean_ctor_set(obj, 1, body_obj); + obj + }, + IxonExpr::Let(non_dep, ty, val, body) => { + let ty_obj = build_ixon_expr(ty); + let val_obj = build_ixon_expr(val); + let body_obj = build_ixon_expr(body); + let obj = lean_alloc_ctor(10, 3, 1); + lean_ctor_set(obj, 0, ty_obj); + lean_ctor_set(obj, 1, val_obj); + lean_ctor_set(obj, 2, body_obj); + let base = obj.cast::(); + *base.add(3 * 8 + 8) = if *non_dep { 1 } else { 0 }; + obj + }, + IxonExpr::Share(idx) => { + let obj = lean_alloc_ctor(11, 0, 8); + let base = obj.cast::(); + *base.add(8).cast::() = *idx; + obj + }, + } + } +} + +/// Build an Array of Ixon.Expr. +pub fn build_ixon_expr_array(exprs: &[Arc]) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(exprs.len(), exprs.len()); + for (i, expr) in exprs.iter().enumerate() { + let expr_obj = build_ixon_expr(expr); + lean_array_set_core(arr, i, expr_obj); + } + arr + } +} + +// ============================================================================= +// Decode Functions +// ============================================================================= + +/// Decode Array UInt64 from Lean. +/// UInt64 values in arrays are stored as: +/// - Scalars (odd pointers) for small values: use lean_unbox +/// - Heap objects (even pointers) with the u64 value at offset 8 +fn decode_u64_array(ptr: *const c_void) -> Vec { + use crate::lean::lean_is_scalar; + + let arr: &crate::lean::array::LeanArrayObject = as_ref_unsafe(ptr.cast()); + arr.to_vec(|elem| { + if lean_is_scalar(elem) { + // Small scalar value + lean_unbox!(u64, elem) + } else { + // Heap-boxed UInt64: value is at offset 8 (after 8-byte header) + unsafe { + let base = elem.cast::(); + *base.add(8).cast::() + } + } + }) +} + +/// Decode Ixon.Expr (12 constructors). +pub fn decode_ixon_expr(ptr: *const c_void) -> IxonExpr { + unsafe { + let tag = lean_obj_tag(ptr.cast_mut()); + match tag { + 0 => { + // sort (idx : UInt64) + let base = ptr.cast::(); + let idx = *base.add(8).cast::(); + IxonExpr::Sort(idx) + }, + 1 => { + // var (idx : UInt64) + let base = ptr.cast::(); + let idx = *base.add(8).cast::(); + IxonExpr::Var(idx) + }, + 2 => { + // ref (refIdx : UInt64) (univIdxs : Array UInt64) + let arr_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let ref_idx = *base.add(8 + 8).cast::(); + let univ_idxs = decode_u64_array(arr_ptr); + IxonExpr::Ref(ref_idx, univ_idxs) + }, + 3 => { + // recur (recIdx : UInt64) (univIdxs : Array UInt64) + let arr_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let rec_idx = *base.add(8 + 8).cast::(); + let univ_idxs = decode_u64_array(arr_ptr); + IxonExpr::Rec(rec_idx, univ_idxs) + }, + 4 => { + // prj (typeRefIdx : UInt64) (fieldIdx : UInt64) (val : Expr) + let val_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let base = ptr.cast::(); + let type_ref_idx = *base.add(8 + 8).cast::(); + let field_idx = *base.add(8 + 16).cast::(); + IxonExpr::Prj( + type_ref_idx, + field_idx, + Arc::new(decode_ixon_expr(val_ptr)), + ) + }, + 5 => { + // str (refIdx : UInt64) + let base = ptr.cast::(); + let ref_idx = *base.add(8).cast::(); + IxonExpr::Str(ref_idx) + }, + 6 => { + // nat (refIdx : UInt64) + let base = ptr.cast::(); + let ref_idx = *base.add(8).cast::(); + IxonExpr::Nat(ref_idx) + }, + 7 => { + // app (f a : Expr) + let f_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let a_ptr = lean_ctor_get(ptr.cast_mut(), 1); + IxonExpr::App( + Arc::new(decode_ixon_expr(f_ptr)), + Arc::new(decode_ixon_expr(a_ptr)), + ) + }, + 8 => { + // lam (ty body : Expr) + let ty_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let body_ptr = lean_ctor_get(ptr.cast_mut(), 1); + IxonExpr::Lam( + Arc::new(decode_ixon_expr(ty_ptr)), + Arc::new(decode_ixon_expr(body_ptr)), + ) + }, + 9 => { + // all (ty body : Expr) + let ty_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let body_ptr = lean_ctor_get(ptr.cast_mut(), 1); + IxonExpr::All( + Arc::new(decode_ixon_expr(ty_ptr)), + Arc::new(decode_ixon_expr(body_ptr)), + ) + }, + 10 => { + // letE (nonDep : Bool) (ty val body : Expr) + let ty_ptr = lean_ctor_get(ptr.cast_mut(), 0); + let val_ptr = lean_ctor_get(ptr.cast_mut(), 1); + let body_ptr = lean_ctor_get(ptr.cast_mut(), 2); + let base = ptr.cast::(); + let non_dep = *base.add(3 * 8 + 8) != 0; + IxonExpr::Let( + non_dep, + Arc::new(decode_ixon_expr(ty_ptr)), + Arc::new(decode_ixon_expr(val_ptr)), + Arc::new(decode_ixon_expr(body_ptr)), + ) + }, + 11 => { + // share (idx : UInt64) + let base = ptr.cast::(); + let idx = *base.add(8).cast::(); + IxonExpr::Share(idx) + }, + _ => panic!("Invalid Ixon.Expr tag: {}", tag), + } + } +} + +/// Decode Array Ixon.Expr. +pub fn decode_ixon_expr_array(ptr: *const c_void) -> Vec> { + let arr: &crate::lean::array::LeanArrayObject = as_ref_unsafe(ptr.cast()); + arr.to_vec(|e| Arc::new(decode_ixon_expr(e))) +} + +// ============================================================================= +// FFI Exports +// ============================================================================= + +/// Round-trip Ixon.Expr. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_expr(ptr: *const c_void) -> *mut c_void { + let expr = decode_ixon_expr(ptr); + build_ixon_expr(&expr) +} diff --git a/src/lean/ffi/ixon/meta.rs b/src/lean/ffi/ixon/meta.rs new file mode 100644 index 00000000..dafe11f7 --- /dev/null +++ b/src/lean/ffi/ixon/meta.rs @@ -0,0 +1,677 @@ +//! Ixon metadata types build/decode/roundtrip FFI. +//! +//! Includes: DataValue, KVMap, ExprMetaData, ExprMetaArena, ConstantMeta, Named, Comm + +use std::ffi::c_void; + +use crate::ix::address::Address; +use crate::ix::env::BinderInfo; +use crate::ix::ixon::Comm; +use crate::ix::ixon::env::Named; +use crate::ix::ixon::metadata::{ + ConstantMeta, DataValue as IxonDataValue, ExprMeta, ExprMetaData, KVMap, +}; +use crate::lean::array::LeanArrayObject; +use crate::lean::ctor::LeanCtorObject; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_array_set_core, + lean_box_fn, lean_ctor_get, lean_ctor_set, lean_ctor_set_uint8, + lean_ctor_set_uint64, lean_is_scalar, lean_obj_tag, +}; + +use super::constant::{ + build_address_array, build_address_from_ixon, decode_ixon_address, +}; +use crate::lean::ffi::ix::constant::{ + build_reducibility_hints, decode_reducibility_hints, +}; +use crate::lean::ffi::ix::expr::binder_info_to_u8; + +// ============================================================================= +// DataValue Build/Decode +// ============================================================================= + +/// Build Ixon.DataValue (for metadata) +pub fn build_ixon_data_value(dv: &IxonDataValue) -> *mut c_void { + unsafe { + match dv { + IxonDataValue::OfString(addr) => { + let addr_obj = build_address_from_ixon(addr); + let obj = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(obj, 0, addr_obj); + obj + }, + IxonDataValue::OfBool(b) => { + let obj = lean_alloc_ctor(1, 0, 1); + lean_ctor_set_uint8(obj, 0, if *b { 1 } else { 0 }); + obj + }, + IxonDataValue::OfName(addr) => { + let addr_obj = build_address_from_ixon(addr); + let obj = lean_alloc_ctor(2, 1, 0); + lean_ctor_set(obj, 0, addr_obj); + obj + }, + IxonDataValue::OfNat(addr) => { + let addr_obj = build_address_from_ixon(addr); + let obj = lean_alloc_ctor(3, 1, 0); + lean_ctor_set(obj, 0, addr_obj); + obj + }, + IxonDataValue::OfInt(addr) => { + let addr_obj = build_address_from_ixon(addr); + let obj = lean_alloc_ctor(4, 1, 0); + lean_ctor_set(obj, 0, addr_obj); + obj + }, + IxonDataValue::OfSyntax(addr) => { + let addr_obj = build_address_from_ixon(addr); + let obj = lean_alloc_ctor(5, 1, 0); + lean_ctor_set(obj, 0, addr_obj); + obj + }, + } + } +} + +/// Decode Ixon.DataValue. +pub fn decode_ixon_data_value(ptr: *const c_void) -> IxonDataValue { + unsafe { + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + IxonDataValue::OfString(decode_ixon_address(addr_ptr)) + }, + 1 => { + let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); + let b = ctor.get_scalar_u8(0, 0) != 0; + IxonDataValue::OfBool(b) + }, + 2 => { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + IxonDataValue::OfName(decode_ixon_address(addr_ptr)) + }, + 3 => { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + IxonDataValue::OfNat(decode_ixon_address(addr_ptr)) + }, + 4 => { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + IxonDataValue::OfInt(decode_ixon_address(addr_ptr)) + }, + 5 => { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + IxonDataValue::OfSyntax(decode_ixon_address(addr_ptr)) + }, + _ => panic!("Invalid Ixon.DataValue tag: {}", tag), + } + } +} + +// ============================================================================= +// KVMap Build/Decode +// ============================================================================= + +/// Build an Ixon.KVMap (Array (Address × DataValue)). +pub fn build_ixon_kvmap(kvmap: &KVMap) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(kvmap.len(), kvmap.len()); + for (i, (addr, dv)) in kvmap.iter().enumerate() { + let addr_obj = build_address_from_ixon(addr); + let dv_obj = build_ixon_data_value(dv); + let pair = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(pair, 0, addr_obj); + lean_ctor_set(pair, 1, dv_obj); + lean_array_set_core(arr, i, pair); + } + arr + } +} + +/// Build Array KVMap. +pub fn build_kvmap_array(kvmaps: &[KVMap]) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(kvmaps.len(), kvmaps.len()); + for (i, kvmap) in kvmaps.iter().enumerate() { + let kvmap_obj = build_ixon_kvmap(kvmap); + lean_array_set_core(arr, i, kvmap_obj); + } + arr + } +} + +/// Decode KVMap (Array (Address × DataValue)). +pub fn decode_ixon_kvmap(ptr: *const c_void) -> KVMap { + let arr: &LeanArrayObject = as_ref_unsafe(ptr.cast()); + arr.to_vec(|pair| unsafe { + let addr_ptr = lean_ctor_get(pair as *mut _, 0); + let dv_ptr = lean_ctor_get(pair as *mut _, 1); + (decode_ixon_address(addr_ptr), decode_ixon_data_value(dv_ptr)) + }) +} + +/// Decode Array KVMap. +fn decode_kvmap_array(ptr: *const c_void) -> Vec { + let arr: &LeanArrayObject = as_ref_unsafe(ptr.cast()); + arr.to_vec(decode_ixon_kvmap) +} + +// ============================================================================= +// Address Array Helpers +// ============================================================================= + +/// Decode Array Address. +fn decode_address_array(ptr: *const c_void) -> Vec
{ + let arr: &LeanArrayObject = as_ref_unsafe(ptr.cast()); + arr.to_vec(decode_ixon_address) +} + +/// Build Array UInt64. +fn build_u64_array(vals: &[u64]) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(vals.len(), vals.len()); + for (i, &v) in vals.iter().enumerate() { + let obj = crate::lean::lean_box_u64(v); + lean_array_set_core(arr, i, obj); + } + arr + } +} + +/// Decode Array UInt64. +fn decode_u64_array(ptr: *const c_void) -> Vec { + let arr: &LeanArrayObject = as_ref_unsafe(ptr.cast()); + arr.to_vec(crate::lean::lean_unbox_u64) +} + +// ============================================================================= +// ExprMetaData Build/Decode +// ============================================================================= + +/// Build Ixon.ExprMetaData Lean object. +/// +/// | Variant | Tag | Obj fields | Scalar bytes | +/// |------------|-----|------------------------|--------------------------| +/// | leaf | 0 | 0 | 0 | +/// | app | 1 | 0 | 16 (2× u64) | +/// | binder | 2 | 1 (name: Address) | 17 (info: u8, 2× u64) | +/// | letBinder | 3 | 1 (name: Address) | 24 (3× u64) | +/// | ref | 4 | 1 (name: Address) | 0 | +/// | prj | 5 | 1 (structName: Address) | 8 (1× u64) | +/// | mdata | 6 | 1 (mdata: Array) | 8 (1× u64) | +pub fn build_expr_meta_data(node: &ExprMetaData) -> *mut c_void { + unsafe { + match node { + ExprMetaData::Leaf => lean_box_fn(0), + + ExprMetaData::App { children } => { + // Tag 1, 0 obj fields, 16 scalar bytes (2× u64) + let obj = lean_alloc_ctor(1, 0, 16); + lean_ctor_set_uint64(obj, 0, children[0]); + lean_ctor_set_uint64(obj, 8, children[1]); + obj + }, + + ExprMetaData::Binder { name, info, children } => { + // Tag 2, 1 obj field (name), scalar: 2× u64 + u8 (info) + // Lean ABI sorts scalars by size descending: [tyChild: u64 @ 0] [bodyChild: u64 @ 8] [info: u8 @ 16] + let obj = lean_alloc_ctor(2, 1, 17); + lean_ctor_set(obj, 0, build_address_from_ixon(name)); + lean_ctor_set_uint64(obj, 8, children[0]); + lean_ctor_set_uint64(obj, 8 + 8, children[1]); + lean_ctor_set_uint8(obj, 8 + 16, binder_info_to_u8(info)); + obj + }, + + ExprMetaData::LetBinder { name, children } => { + // Tag 3, 1 obj field (name), 24 scalar bytes (3× u64) + let obj = lean_alloc_ctor(3, 1, 24); + lean_ctor_set(obj, 0, build_address_from_ixon(name)); + lean_ctor_set_uint64(obj, 8, children[0]); + lean_ctor_set_uint64(obj, 8 + 8, children[1]); + lean_ctor_set_uint64(obj, 8 + 16, children[2]); + obj + }, + + ExprMetaData::Ref { name } => { + // Tag 4, 1 obj field (name), 0 scalar bytes + let obj = lean_alloc_ctor(4, 1, 0); + lean_ctor_set(obj, 0, build_address_from_ixon(name)); + obj + }, + + ExprMetaData::Prj { struct_name, child } => { + // Tag 5, 1 obj field (structName), 8 scalar bytes (1× u64) + let obj = lean_alloc_ctor(5, 1, 8); + lean_ctor_set(obj, 0, build_address_from_ixon(struct_name)); + lean_ctor_set_uint64(obj, 8, *child); + obj + }, + + ExprMetaData::Mdata { mdata, child } => { + // Tag 6, 1 obj field (mdata: Array KVMap), 8 scalar bytes (1× u64) + let mdata_obj = build_kvmap_array(mdata); + let obj = lean_alloc_ctor(6, 1, 8); + lean_ctor_set(obj, 0, mdata_obj); + lean_ctor_set_uint64(obj, 8, *child); + obj + }, + } + } +} + +/// Decode Ixon.ExprMetaData from Lean pointer. +pub fn decode_expr_meta_data(ptr: *const c_void) -> ExprMetaData { + unsafe { + // Leaf (tag 0, no fields) is represented as a scalar lean_box(0) + if lean_is_scalar(ptr) { + let tag = (ptr as usize) >> 1; + assert_eq!(tag, 0, "Invalid scalar ExprMetaData tag: {}", tag); + return ExprMetaData::Leaf; + } + let tag = lean_obj_tag(ptr as *mut _); + let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); + match tag { + 1 => { + // app: 0 obj fields, 2× u64 scalar + let fun_ = ctor.get_scalar_u64(0, 0); + let arg = ctor.get_scalar_u64(0, 8); + ExprMetaData::App { children: [fun_, arg] } + }, + + 2 => { + // binder: 1 obj field (name), scalar (Lean ABI: u64s first, then u8): + // [tyChild: u64 @ 0] [bodyChild: u64 @ 8] [info: u8 @ 16] + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let ty_child = ctor.get_scalar_u64(1, 0); + let body_child = ctor.get_scalar_u64(1, 8); + let info_byte = ctor.get_scalar_u8(1, 16); + let info = match info_byte { + 0 => BinderInfo::Default, + 1 => BinderInfo::Implicit, + 2 => BinderInfo::StrictImplicit, + 3 => BinderInfo::InstImplicit, + _ => panic!("Invalid BinderInfo tag: {}", info_byte), + }; + ExprMetaData::Binder { + name: decode_ixon_address(name_ptr), + info, + children: [ty_child, body_child], + } + }, + + 3 => { + // letBinder: 1 obj field (name), 3× u64 scalar + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let ty_child = ctor.get_scalar_u64(1, 0); + let val_child = ctor.get_scalar_u64(1, 8); + let body_child = ctor.get_scalar_u64(1, 16); + ExprMetaData::LetBinder { + name: decode_ixon_address(name_ptr), + children: [ty_child, val_child, body_child], + } + }, + + 4 => { + // ref: 1 obj field (name), 0 scalar + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + ExprMetaData::Ref { name: decode_ixon_address(name_ptr) } + }, + + 5 => { + // prj: 1 obj field (structName), 1× u64 scalar + let name_ptr = lean_ctor_get(ptr as *mut _, 0); + let child = ctor.get_scalar_u64(1, 0); + ExprMetaData::Prj { struct_name: decode_ixon_address(name_ptr), child } + }, + + 6 => { + // mdata: 1 obj field (mdata: Array KVMap), 1× u64 scalar + let mdata_ptr = lean_ctor_get(ptr as *mut _, 0); + let child = ctor.get_scalar_u64(1, 0); + ExprMetaData::Mdata { mdata: decode_kvmap_array(mdata_ptr), child } + }, + + _ => panic!("Invalid Ixon.ExprMetaData tag: {}", tag), + } + } +} + +// ============================================================================= +// ExprMetaArena Build/Decode +// ============================================================================= + +/// Build Ixon.ExprMetaArena Lean object. +/// ExprMetaArena is a single-field structure (nodes : Array ExprMetaData), +/// which Lean unboxes — the value IS the Array directly. +pub fn build_expr_meta_arena(arena: &ExprMeta) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(arena.nodes.len(), arena.nodes.len()); + for (i, node) in arena.nodes.iter().enumerate() { + lean_array_set_core(arr, i, build_expr_meta_data(node)); + } + arr + } +} + +/// Decode Ixon.ExprMetaArena from Lean pointer. +/// Single-field struct is unboxed — ptr IS the Array directly. +pub fn decode_expr_meta_arena(ptr: *const c_void) -> ExprMeta { + let arr: &LeanArrayObject = as_ref_unsafe(ptr.cast()); + ExprMeta { nodes: arr.to_vec(decode_expr_meta_data) } +} + +// ============================================================================= +// ConstantMeta Build/Decode +// ============================================================================= + +/// Build Ixon.ConstantMeta Lean object. +/// +/// | Variant | Tag | Obj fields | Scalar bytes | +/// |---------|-----|-----------|-------------| +/// | empty | 0 | 0 | 0 | +/// | defn | 1 | 6 (name, lvls, hints, all, ctx, arena) | 16 (2× u64) | +/// | axio | 2 | 3 (name, lvls, arena) | 8 (1× u64) | +/// | quot | 3 | 3 (name, lvls, arena) | 8 (1× u64) | +/// | indc | 4 | 6 (name, lvls, ctors, all, ctx, arena) | 8 (1× u64) | +/// | ctor | 5 | 4 (name, lvls, induct, arena) | 8 (1× u64) | +/// | recr | 6 | 7 (name, lvls, rules, all, ctx, arena, ruleRoots) | 8 (1× u64) | +pub fn build_constant_meta(meta: &ConstantMeta) -> *mut c_void { + unsafe { + match meta { + ConstantMeta::Empty => lean_box_fn(0), + + ConstantMeta::Def { + name, + lvls, + hints, + all, + ctx, + arena, + type_root, + value_root, + } => { + let obj = lean_alloc_ctor(1, 6, 16); + lean_ctor_set(obj, 0, build_address_from_ixon(name)); + lean_ctor_set(obj, 1, build_address_array(lvls)); + lean_ctor_set(obj, 2, build_reducibility_hints(hints)); + lean_ctor_set(obj, 3, build_address_array(all)); + lean_ctor_set(obj, 4, build_address_array(ctx)); + lean_ctor_set(obj, 5, build_expr_meta_arena(arena)); + lean_ctor_set_uint64(obj, 6 * 8, *type_root); + lean_ctor_set_uint64(obj, 6 * 8 + 8, *value_root); + obj + }, + + ConstantMeta::Axio { name, lvls, arena, type_root } => { + let obj = lean_alloc_ctor(2, 3, 8); + lean_ctor_set(obj, 0, build_address_from_ixon(name)); + lean_ctor_set(obj, 1, build_address_array(lvls)); + lean_ctor_set(obj, 2, build_expr_meta_arena(arena)); + lean_ctor_set_uint64(obj, 3 * 8, *type_root); + obj + }, + + ConstantMeta::Quot { name, lvls, arena, type_root } => { + let obj = lean_alloc_ctor(3, 3, 8); + lean_ctor_set(obj, 0, build_address_from_ixon(name)); + lean_ctor_set(obj, 1, build_address_array(lvls)); + lean_ctor_set(obj, 2, build_expr_meta_arena(arena)); + lean_ctor_set_uint64(obj, 3 * 8, *type_root); + obj + }, + + ConstantMeta::Indc { name, lvls, ctors, all, ctx, arena, type_root } => { + let obj = lean_alloc_ctor(4, 6, 8); + lean_ctor_set(obj, 0, build_address_from_ixon(name)); + lean_ctor_set(obj, 1, build_address_array(lvls)); + lean_ctor_set(obj, 2, build_address_array(ctors)); + lean_ctor_set(obj, 3, build_address_array(all)); + lean_ctor_set(obj, 4, build_address_array(ctx)); + lean_ctor_set(obj, 5, build_expr_meta_arena(arena)); + lean_ctor_set_uint64(obj, 6 * 8, *type_root); + obj + }, + + ConstantMeta::Ctor { name, lvls, induct, arena, type_root } => { + let obj = lean_alloc_ctor(5, 4, 8); + lean_ctor_set(obj, 0, build_address_from_ixon(name)); + lean_ctor_set(obj, 1, build_address_array(lvls)); + lean_ctor_set(obj, 2, build_address_from_ixon(induct)); + lean_ctor_set(obj, 3, build_expr_meta_arena(arena)); + lean_ctor_set_uint64(obj, 4 * 8, *type_root); + obj + }, + + ConstantMeta::Rec { + name, + lvls, + rules, + all, + ctx, + arena, + type_root, + rule_roots, + } => { + let obj = lean_alloc_ctor(6, 7, 8); + lean_ctor_set(obj, 0, build_address_from_ixon(name)); + lean_ctor_set(obj, 1, build_address_array(lvls)); + lean_ctor_set(obj, 2, build_address_array(rules)); + lean_ctor_set(obj, 3, build_address_array(all)); + lean_ctor_set(obj, 4, build_address_array(ctx)); + lean_ctor_set(obj, 5, build_expr_meta_arena(arena)); + lean_ctor_set(obj, 6, build_u64_array(rule_roots)); + lean_ctor_set_uint64(obj, 7 * 8, *type_root); + obj + }, + } + } +} + +/// Decode Ixon.ConstantMeta from Lean pointer. +pub fn decode_constant_meta(ptr: *const c_void) -> ConstantMeta { + unsafe { + // Empty (tag 0, no fields) is represented as a scalar lean_box(0) + if lean_is_scalar(ptr) { + let tag = (ptr as usize) >> 1; + assert_eq!(tag, 0, "Invalid scalar ConstantMeta tag: {}", tag); + return ConstantMeta::Empty; + } + let tag = lean_obj_tag(ptr as *mut _); + let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); + match tag { + 1 => { + // defn: 6 obj fields, 2× u64 scalar + let name = decode_ixon_address(lean_ctor_get(ptr as *mut _, 0)); + let lvls = decode_address_array(lean_ctor_get(ptr as *mut _, 1)); + let hints = decode_reducibility_hints(lean_ctor_get(ptr as *mut _, 2)); + let all = decode_address_array(lean_ctor_get(ptr as *mut _, 3)); + let ctx = decode_address_array(lean_ctor_get(ptr as *mut _, 4)); + let arena = decode_expr_meta_arena(lean_ctor_get(ptr as *mut _, 5)); + let type_root = ctor.get_scalar_u64(6, 0); + let value_root = ctor.get_scalar_u64(6, 8); + ConstantMeta::Def { + name, + lvls, + hints, + all, + ctx, + arena, + type_root, + value_root, + } + }, + + 2 => { + // axio: 3 obj fields, 1× u64 scalar + let name = decode_ixon_address(lean_ctor_get(ptr as *mut _, 0)); + let lvls = decode_address_array(lean_ctor_get(ptr as *mut _, 1)); + let arena = decode_expr_meta_arena(lean_ctor_get(ptr as *mut _, 2)); + let type_root = ctor.get_scalar_u64(3, 0); + ConstantMeta::Axio { name, lvls, arena, type_root } + }, + + 3 => { + // quot: 3 obj fields, 1× u64 scalar + let name = decode_ixon_address(lean_ctor_get(ptr as *mut _, 0)); + let lvls = decode_address_array(lean_ctor_get(ptr as *mut _, 1)); + let arena = decode_expr_meta_arena(lean_ctor_get(ptr as *mut _, 2)); + let type_root = ctor.get_scalar_u64(3, 0); + ConstantMeta::Quot { name, lvls, arena, type_root } + }, + + 4 => { + // indc: 6 obj fields, 1× u64 scalar + let name = decode_ixon_address(lean_ctor_get(ptr as *mut _, 0)); + let lvls = decode_address_array(lean_ctor_get(ptr as *mut _, 1)); + let ctors = decode_address_array(lean_ctor_get(ptr as *mut _, 2)); + let all = decode_address_array(lean_ctor_get(ptr as *mut _, 3)); + let ctx = decode_address_array(lean_ctor_get(ptr as *mut _, 4)); + let arena = decode_expr_meta_arena(lean_ctor_get(ptr as *mut _, 5)); + let type_root = ctor.get_scalar_u64(6, 0); + ConstantMeta::Indc { name, lvls, ctors, all, ctx, arena, type_root } + }, + + 5 => { + // ctor: 4 obj fields, 1× u64 scalar + let name = decode_ixon_address(lean_ctor_get(ptr as *mut _, 0)); + let lvls = decode_address_array(lean_ctor_get(ptr as *mut _, 1)); + let induct = decode_ixon_address(lean_ctor_get(ptr as *mut _, 2)); + let arena = decode_expr_meta_arena(lean_ctor_get(ptr as *mut _, 3)); + let type_root = ctor.get_scalar_u64(4, 0); + ConstantMeta::Ctor { name, lvls, induct, arena, type_root } + }, + + 6 => { + // recr: 7 obj fields, 1× u64 scalar + let name = decode_ixon_address(lean_ctor_get(ptr as *mut _, 0)); + let lvls = decode_address_array(lean_ctor_get(ptr as *mut _, 1)); + let rules = decode_address_array(lean_ctor_get(ptr as *mut _, 2)); + let all = decode_address_array(lean_ctor_get(ptr as *mut _, 3)); + let ctx = decode_address_array(lean_ctor_get(ptr as *mut _, 4)); + let arena = decode_expr_meta_arena(lean_ctor_get(ptr as *mut _, 5)); + let rule_roots = decode_u64_array(lean_ctor_get(ptr as *mut _, 6)); + let type_root = ctor.get_scalar_u64(7, 0); + ConstantMeta::Rec { + name, + lvls, + rules, + all, + ctx, + arena, + type_root, + rule_roots, + } + }, + + _ => panic!("Invalid Ixon.ConstantMeta tag: {}", tag), + } + } +} + +// ============================================================================= +// Named and Comm Build/Decode +// ============================================================================= + +/// Build Ixon.Named { addr : Address, constMeta : ConstantMeta } +pub fn build_named(addr: &Address, meta: &ConstantMeta) -> *mut c_void { + unsafe { + let addr_obj = build_address_from_ixon(addr); + let meta_obj = build_constant_meta(meta); + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, addr_obj); + lean_ctor_set(obj, 1, meta_obj); + obj + } +} + +/// Decode Ixon.Named. +pub fn decode_named(ptr: *const c_void) -> Named { + unsafe { + let addr_ptr = lean_ctor_get(ptr as *mut _, 0); + let meta_ptr = lean_ctor_get(ptr as *mut _, 1); + Named { + addr: decode_ixon_address(addr_ptr), + meta: decode_constant_meta(meta_ptr), + } + } +} + +/// Build Ixon.Comm { secret : Address, payload : Address } +pub fn build_ixon_comm(comm: &Comm) -> *mut c_void { + unsafe { + let secret_obj = build_address_from_ixon(&comm.secret); + let payload_obj = build_address_from_ixon(&comm.payload); + let obj = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(obj, 0, secret_obj); + lean_ctor_set(obj, 1, payload_obj); + obj + } +} + +/// Decode Ixon.Comm. +pub fn decode_ixon_comm(ptr: *const c_void) -> Comm { + unsafe { + let secret_ptr = lean_ctor_get(ptr as *mut _, 0); + let payload_ptr = lean_ctor_get(ptr as *mut _, 1); + Comm { + secret: decode_ixon_address(secret_ptr), + payload: decode_ixon_address(payload_ptr), + } + } +} + +// ============================================================================= +// FFI Exports +// ============================================================================= + +/// Round-trip Ixon.DataValue. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_data_value( + ptr: *const c_void, +) -> *mut c_void { + let dv = decode_ixon_data_value(ptr); + build_ixon_data_value(&dv) +} + +/// Round-trip Ixon.Comm. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_comm(ptr: *const c_void) -> *mut c_void { + let comm = decode_ixon_comm(ptr); + build_ixon_comm(&comm) +} + +/// Round-trip Ixon.ExprMetaData. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_expr_meta_data( + ptr: *const c_void, +) -> *mut c_void { + let node = decode_expr_meta_data(ptr); + build_expr_meta_data(&node) +} + +/// Round-trip Ixon.ExprMetaArena. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_expr_meta_arena( + ptr: *const c_void, +) -> *mut c_void { + let arena = decode_expr_meta_arena(ptr); + build_expr_meta_arena(&arena) +} + +/// Round-trip Ixon.ConstantMeta (full arena-based). +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_constant_meta( + ptr: *const c_void, +) -> *mut c_void { + let meta = decode_constant_meta(ptr); + build_constant_meta(&meta) +} + +/// Round-trip Ixon.Named (with real metadata). +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_named(ptr: *const c_void) -> *mut c_void { + let named = decode_named(ptr); + build_named(&named.addr, &named.meta) +} diff --git a/src/lean/ffi/ixon/serialize.rs b/src/lean/ffi/ixon/serialize.rs new file mode 100644 index 00000000..e9c7eb22 --- /dev/null +++ b/src/lean/ffi/ixon/serialize.rs @@ -0,0 +1,322 @@ +//! Ixon serialization compatibility FFI. +//! +//! Contains FFI functions for comparing Lean and Rust serialization outputs, +//! and Env serialization roundtrip testing. + +use std::ffi::c_void; +use std::sync::Arc; + +use crate::ix::address::Address; +use crate::ix::ixon::expr::Expr as IxonExpr; +use crate::ix::ixon::serialize::put_expr; +use crate::ix::ixon::sharing::hash_expr; +use crate::ix::ixon::univ::{Univ as IxonUniv, put_univ}; +use crate::lean::array::LeanArrayObject; +use crate::lean::ctor::LeanCtorObject; +use crate::lean::sarray::LeanSArrayObject; +use crate::lean::{as_ref_unsafe, lean_is_scalar, lean_unbox_u64}; + +use super::constant::{decode_ixon_address, decode_ixon_constant}; + +/// Unbox a Lean UInt64, handling both scalar and boxed representations. +fn lean_ptr_to_u64(ptr: *const c_void) -> u64 { + if lean_is_scalar(ptr) { + (ptr as usize >> 1) as u64 + } else { + lean_unbox_u64(ptr) + } +} + +/// Decode a Lean `Ixon.Expr` to a Rust `IxonExpr`. +pub fn lean_ptr_to_ixon_expr(ptr: *const c_void) -> Arc { + assert!(!lean_is_scalar(ptr), "Ixon.Expr should not be scalar"); + let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); + match ctor.tag() { + 0 => { + let idx = ctor.get_scalar_u64(0, 0); + Arc::new(IxonExpr::Sort(idx)) + }, + 1 => { + let idx = ctor.get_scalar_u64(0, 0); + Arc::new(IxonExpr::Var(idx)) + }, + 2 => { + let [univs_ptr] = ctor.objs(); + let ref_idx = ctor.get_scalar_u64(1, 0); + let univs_arr: &LeanArrayObject = as_ref_unsafe(univs_ptr.cast()); + let univs = univs_arr.to_vec(lean_ptr_to_u64); + Arc::new(IxonExpr::Ref(ref_idx, univs)) + }, + 3 => { + let [univs_ptr] = ctor.objs(); + let rec_idx = ctor.get_scalar_u64(1, 0); + let univs_arr: &LeanArrayObject = as_ref_unsafe(univs_ptr.cast()); + let univs = univs_arr.to_vec(lean_ptr_to_u64); + Arc::new(IxonExpr::Rec(rec_idx, univs)) + }, + 4 => { + let [val_ptr] = ctor.objs(); + let type_idx = ctor.get_scalar_u64(1, 0); + let field_idx = ctor.get_scalar_u64(1, 8); + let val = lean_ptr_to_ixon_expr(val_ptr); + Arc::new(IxonExpr::Prj(type_idx, field_idx, val)) + }, + 5 => { + let idx = ctor.get_scalar_u64(0, 0); + Arc::new(IxonExpr::Str(idx)) + }, + 6 => { + let idx = ctor.get_scalar_u64(0, 0); + Arc::new(IxonExpr::Nat(idx)) + }, + 7 => { + let [fun_ptr, arg_ptr] = ctor.objs(); + let fun_ = lean_ptr_to_ixon_expr(fun_ptr); + let arg = lean_ptr_to_ixon_expr(arg_ptr); + Arc::new(IxonExpr::App(fun_, arg)) + }, + 8 => { + let [ty_ptr, body_ptr] = ctor.objs(); + let ty = lean_ptr_to_ixon_expr(ty_ptr); + let body = lean_ptr_to_ixon_expr(body_ptr); + Arc::new(IxonExpr::Lam(ty, body)) + }, + 9 => { + let [ty_ptr, body_ptr] = ctor.objs(); + let ty = lean_ptr_to_ixon_expr(ty_ptr); + let body = lean_ptr_to_ixon_expr(body_ptr); + Arc::new(IxonExpr::All(ty, body)) + }, + 10 => { + let [ty_ptr, val_ptr, body_ptr] = ctor.objs(); + let base_ptr = (ctor as *const LeanCtorObject).cast::(); + let non_dep = unsafe { *base_ptr.add(8 + 3 * 8) } != 0; + let ty = lean_ptr_to_ixon_expr(ty_ptr); + let val = lean_ptr_to_ixon_expr(val_ptr); + let body = lean_ptr_to_ixon_expr(body_ptr); + Arc::new(IxonExpr::Let(non_dep, ty, val, body)) + }, + 11 => { + let idx = ctor.get_scalar_u64(0, 0); + Arc::new(IxonExpr::Share(idx)) + }, + tag => panic!("Unknown Ixon.Expr tag: {}", tag), + } +} + +/// Check if Lean's computed hash matches Rust's computed hash. +#[unsafe(no_mangle)] +pub extern "C" fn rs_expr_hash_matches( + expr_ptr: *const c_void, + expected_hash: *const c_void, +) -> bool { + let expr = lean_ptr_to_ixon_expr(expr_ptr); + let hash = hash_expr(&expr); + let expected = decode_ixon_address(expected_hash); + Address::from_slice(hash.as_bytes()).is_ok_and(|h| h == expected) +} + +/// Decode a Lean `Ixon.Univ` to a Rust `IxonUniv`. +fn lean_ptr_to_ixon_univ(ptr: *const c_void) -> Arc { + if lean_is_scalar(ptr) { + return IxonUniv::zero(); + } + let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); + match ctor.tag() { + 1 => { + let [inner] = ctor.objs(); + IxonUniv::succ(lean_ptr_to_ixon_univ(inner)) + }, + 2 => { + let [a, b] = ctor.objs(); + IxonUniv::max(lean_ptr_to_ixon_univ(a), lean_ptr_to_ixon_univ(b)) + }, + 3 => { + let [a, b] = ctor.objs(); + IxonUniv::imax(lean_ptr_to_ixon_univ(a), lean_ptr_to_ixon_univ(b)) + }, + 4 => IxonUniv::var(ctor.get_scalar_u64(0, 0)), + tag => panic!("Unknown Ixon.Univ tag: {}", tag), + } +} + +/// Check if Lean's Ixon.Univ serialization matches Rust. +#[unsafe(no_mangle)] +pub extern "C" fn rs_eq_univ_serialization( + univ_ptr: *const c_void, + bytes: &LeanSArrayObject, +) -> bool { + let univ = lean_ptr_to_ixon_univ(univ_ptr); + let bytes_data = bytes.data(); + let mut buf = Vec::with_capacity(bytes_data.len()); + put_univ(&univ, &mut buf); + buf == bytes_data +} + +/// Check if Lean's Ixon.Expr serialization matches Rust. +#[unsafe(no_mangle)] +pub extern "C" fn rs_eq_expr_serialization( + expr_ptr: *const c_void, + bytes: &LeanSArrayObject, +) -> bool { + let expr = lean_ptr_to_ixon_expr(expr_ptr); + let bytes_data = bytes.data(); + let mut buf = Vec::with_capacity(bytes_data.len()); + put_expr(&expr, &mut buf); + buf == bytes_data +} + +/// Check if Lean's Ixon.Constant serialization matches Rust. +#[unsafe(no_mangle)] +pub extern "C" fn rs_eq_constant_serialization( + constant_ptr: *const c_void, + bytes: &LeanSArrayObject, +) -> bool { + let constant = decode_ixon_constant(constant_ptr); + let bytes_data = bytes.data(); + let mut buf = Vec::with_capacity(bytes_data.len()); + constant.put(&mut buf); + buf == bytes_data +} + +/// Check if Lean's Ixon.Env serialization can be deserialized by Rust and content matches. +/// Due to HashMap ordering differences, we compare deserialized content rather than bytes. +#[unsafe(no_mangle)] +pub extern "C" fn rs_eq_env_serialization( + raw_env_ptr: *const c_void, + bytes: &LeanSArrayObject, +) -> bool { + use super::env::decode_raw_env; + use crate::ix::ixon::env::Env; + + let decoded = decode_raw_env(raw_env_ptr); + let bytes_data = bytes.data(); + + // Deserialize Lean's bytes using Rust's deserializer + let rust_env = match Env::get(&mut &bytes_data[..]) { + Ok(env) => env, + Err(_) => return false, + }; + + // Compare content: check that all items from decoded RawEnv are in the deserialized Env + // Consts + if rust_env.consts.len() != decoded.consts.len() { + return false; + } + for rc in &decoded.consts { + match rust_env.consts.get(&rc.addr) { + Some(c) if *c == rc.constant => {}, + _ => return false, + } + } + + // Blobs + if rust_env.blobs.len() != decoded.blobs.len() { + return false; + } + for rb in &decoded.blobs { + match rust_env.blobs.get(&rb.addr) { + Some(b) if *b == rb.bytes => {}, + _ => return false, + } + } + + // Comms + if rust_env.comms.len() != decoded.comms.len() { + return false; + } + for rc in &decoded.comms { + let expected_comm = crate::ix::ixon::comm::Comm { + secret: rc.comm.secret.clone(), + payload: rc.comm.payload.clone(), + }; + match rust_env.comms.get(&rc.addr) { + Some(c) if *c == expected_comm => {}, + _ => return false, + } + } + + // Named: compare by checking all entries exist with matching addresses + if rust_env.named.len() != decoded.named.len() { + return false; + } + for rn in &decoded.named { + match rust_env.named.get(&rn.name) { + Some(named) if named.addr == rn.addr => {}, + _ => return false, + } + } + + true +} + +/// FFI: Test Env serialization roundtrip. +/// Takes: +/// - lean_bytes_ptr: pointer to ByteArray containing serialized Env from Lean +/// +/// Returns: true if Rust can deserialize and re-serialize to the same bytes +#[unsafe(no_mangle)] +extern "C" fn rs_env_serde_roundtrip(lean_bytes_ptr: *const c_void) -> bool { + use crate::ix::ixon::env::Env; + + // Get bytes from Lean ByteArray + let bytes_arr: &LeanSArrayObject = as_ref_unsafe(lean_bytes_ptr.cast()); + let lean_bytes = bytes_arr.data().to_vec(); + + // Try to deserialize with Rust + let mut slice = lean_bytes.as_slice(); + let env = match Env::get(&mut slice) { + Ok(e) => e, + Err(e) => { + eprintln!("Rust Env::get failed: {}", e); + return false; + }, + }; + + // Re-serialize + let mut rust_bytes = Vec::new(); + if let Err(e) = env.put(&mut rust_bytes) { + eprintln!("Rust Env::put failed: {}", e); + return false; + } + + // Compare + if lean_bytes != rust_bytes { + eprintln!("Env roundtrip mismatch:"); + eprintln!(" Input: {} bytes", lean_bytes.len()); + eprintln!(" Output: {} bytes", rust_bytes.len()); + if lean_bytes.len() <= 200 { + eprintln!(" Input bytes: {:?}", lean_bytes); + } + if rust_bytes.len() <= 200 { + eprintln!(" Output bytes: {:?}", rust_bytes); + } + return false; + } + + true +} + +/// FFI: Compare Env serialization between Lean and Rust. +/// Takes: +/// - lean_bytes_ptr: pointer to ByteArray containing serialized Env from Lean +/// +/// Returns: true if Rust can deserialize and the counts match +#[unsafe(no_mangle)] +extern "C" fn rs_env_serde_check(lean_bytes_ptr: *const c_void) -> bool { + use crate::ix::ixon::env::Env; + + // Get bytes from Lean ByteArray + let bytes_arr: &LeanSArrayObject = as_ref_unsafe(lean_bytes_ptr.cast()); + let lean_bytes = bytes_arr.data().to_vec(); + + // Try to deserialize with Rust + let mut slice = lean_bytes.as_slice(); + match Env::get(&mut slice) { + Ok(_) => true, + Err(e) => { + eprintln!("Rust Env::get failed: {}", e); + false + }, + } +} diff --git a/src/lean/ffi/ixon/sharing.rs b/src/lean/ffi/ixon/sharing.rs new file mode 100644 index 00000000..955386cb --- /dev/null +++ b/src/lean/ffi/ixon/sharing.rs @@ -0,0 +1,155 @@ +//! Ixon sharing analysis FFI. + +use std::ffi::c_void; +use std::sync::Arc; + +use crate::ix::ixon::expr::Expr as IxonExpr; +use crate::ix::ixon::serialize::put_expr; +use crate::ix::ixon::sharing::{ + analyze_block, build_sharing_vec, decide_sharing, +}; +use crate::lean::array::LeanArrayObject; +use crate::lean::as_ref_unsafe; +use crate::lean::sarray::LeanSArrayObject; + +use super::expr::decode_ixon_expr_array; +use super::serialize::lean_ptr_to_ixon_expr; + +/// FFI: Debug sharing analysis - print usage counts for subterms with usage >= 2. +/// This helps diagnose why Lean and Rust make different sharing decisions. +#[unsafe(no_mangle)] +pub extern "C" fn rs_debug_sharing_analysis(exprs_ptr: *const c_void) { + let exprs_arr: &LeanArrayObject = as_ref_unsafe(exprs_ptr.cast()); + let exprs: Vec> = exprs_arr.to_vec(lean_ptr_to_ixon_expr); + + println!("[Rust] Analyzing {} input expressions", exprs.len()); + + let (info_map, _ptr_to_hash) = analyze_block(&exprs, false); + let topo_order = crate::ix::ixon::sharing::topological_sort(&info_map); + let effective_sizes = + crate::ix::ixon::sharing::compute_effective_sizes(&info_map, &topo_order); + + println!("[Rust] Found {} unique subterms", info_map.len()); + + // Collect subterms with usage >= 2 + let mut candidates: Vec<_> = info_map + .iter() + .filter(|(_, info)| info.usage_count >= 2) + .filter_map(|(hash, info)| { + let eff_size = *effective_sizes.get(hash)?; + Some((hash, info, eff_size)) + }) + .collect(); + + // Sort by usage count descending + candidates.sort_by(|a, b| b.1.usage_count.cmp(&a.1.usage_count)); + + println!("[Rust] Subterms with usage >= 2:"); + for (hash, info, eff_size) in candidates { + let n = info.usage_count; + let potential = (n.cast_signed() - 1) * eff_size.cast_signed() + - (n.cast_signed() + eff_size.cast_signed()); + println!( + " usage={} eff_size={} potential={} hash={:.8}", + n, eff_size, potential, hash + ); + println!(" expr={:?}", info.expr); + } +} + +/// FFI: Run Rust's sharing analysis on Lean-provided Ixon.Expr array. +/// Returns the number of shared items Rust would produce. +#[unsafe(no_mangle)] +extern "C" fn rs_analyze_sharing_count(exprs_ptr: *const c_void) -> u64 { + let exprs = decode_ixon_expr_array(exprs_ptr); + + let (info_map, _ptr_to_hash) = analyze_block(&exprs, false); + let shared_hashes = decide_sharing(&info_map); + + shared_hashes.len() as u64 +} + +/// FFI: Run Rust's full sharing pipeline on Lean-provided Ixon.Expr array. +/// Writes the sharing vector and rewritten exprs to output arrays. +/// Returns number of shared items. +#[unsafe(no_mangle)] +extern "C" fn rs_run_sharing_analysis( + exprs_ptr: *const c_void, + out_sharing_vec: *mut c_void, + out_rewritten: *mut c_void, +) -> u64 { + let exprs = decode_ixon_expr_array(exprs_ptr); + + let (info_map, ptr_to_hash) = analyze_block(&exprs, false); + let shared_hashes = decide_sharing(&info_map); + let (rewritten_exprs, sharing_vec) = + build_sharing_vec(&exprs, &shared_hashes, &ptr_to_hash, &info_map); + + // Serialize sharing vector to bytes + let mut sharing_bytes: Vec = Vec::new(); + for expr in &sharing_vec { + put_expr(expr, &mut sharing_bytes); + } + + // Serialize rewritten exprs to bytes + let mut rewritten_bytes: Vec = Vec::new(); + for expr in &rewritten_exprs { + put_expr(expr, &mut rewritten_bytes); + } + + // Write to output arrays + let sharing_out: &mut LeanSArrayObject = + unsafe { &mut *out_sharing_vec.cast() }; + sharing_out.set_data(&sharing_bytes); + + let rewritten_out: &mut LeanSArrayObject = + unsafe { &mut *out_rewritten.cast() }; + rewritten_out.set_data(&rewritten_bytes); + + shared_hashes.len() as u64 +} + +/// FFI: Compare Lean's sharing analysis with Rust's on the same input. +/// Takes: exprs (Array Expr), lean_sharing (Array Expr), lean_rewritten (Array Expr) +/// Returns packed u64: +/// - bits 0-31: 1 if sharing vectors match, 0 otherwise +/// - bits 32-47: Lean sharing count +/// - bits 48-63: Rust sharing count +#[unsafe(no_mangle)] +extern "C" fn rs_compare_sharing_analysis( + exprs_ptr: *const c_void, + lean_sharing_ptr: *const c_void, + _lean_rewritten_ptr: *const c_void, +) -> u64 { + // Decode input expressions + let exprs = decode_ixon_expr_array(exprs_ptr); + + // Decode Lean's sharing vector + let lean_sharing = decode_ixon_expr_array(lean_sharing_ptr); + + // Run Rust's sharing analysis + let (info_map, ptr_to_hash) = analyze_block(&exprs, false); + let shared_hashes = decide_sharing(&info_map); + let (_rewritten_exprs, rust_sharing) = + build_sharing_vec(&exprs, &shared_hashes, &ptr_to_hash, &info_map); + + // Compare sharing vectors + let lean_count = lean_sharing.len() as u64; + let rust_count = rust_sharing.len() as u64; + + // Serialize both to bytes for comparison + let mut lean_bytes: Vec = Vec::new(); + for expr in &lean_sharing { + put_expr(expr, &mut lean_bytes); + } + + let mut rust_bytes: Vec = Vec::new(); + for expr in &rust_sharing { + put_expr(expr, &mut rust_bytes); + } + + let matches = if lean_bytes == rust_bytes { 1u64 } else { 0u64 }; + + // Pack result: matches | (lean_count << 32) | (rust_count << 48) + matches | (lean_count << 32) | (rust_count << 48) +} diff --git a/src/lean/ffi/ixon/univ.rs b/src/lean/ffi/ixon/univ.rs new file mode 100644 index 00000000..3558c244 --- /dev/null +++ b/src/lean/ffi/ixon/univ.rs @@ -0,0 +1,126 @@ +//! Ixon.Univ build/decode/roundtrip FFI. + +use std::ffi::c_void; +use std::sync::Arc; + +use crate::ix::ixon::univ::Univ as IxonUniv; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_array_set_core, + lean_box_fn, lean_ctor_get, lean_ctor_set, lean_is_scalar, lean_obj_tag, +}; + +/// Build Ixon.Univ +pub fn build_ixon_univ(univ: &IxonUniv) -> *mut c_void { + unsafe { + match univ { + IxonUniv::Zero => lean_box_fn(0), + IxonUniv::Succ(inner) => { + let inner_obj = build_ixon_univ(inner); + let obj = lean_alloc_ctor(1, 1, 0); + lean_ctor_set(obj, 0, inner_obj); + obj + }, + IxonUniv::Max(a, b) => { + let a_obj = build_ixon_univ(a); + let b_obj = build_ixon_univ(b); + let obj = lean_alloc_ctor(2, 2, 0); + lean_ctor_set(obj, 0, a_obj); + lean_ctor_set(obj, 1, b_obj); + obj + }, + IxonUniv::IMax(a, b) => { + let a_obj = build_ixon_univ(a); + let b_obj = build_ixon_univ(b); + let obj = lean_alloc_ctor(3, 2, 0); + lean_ctor_set(obj, 0, a_obj); + lean_ctor_set(obj, 1, b_obj); + obj + }, + IxonUniv::Var(idx) => { + let obj = lean_alloc_ctor(4, 0, 8); + let base = obj.cast::(); + *base.add(8).cast::() = *idx; + obj + }, + } + } +} + +/// Build an Array of Ixon.Univ. +pub fn build_ixon_univ_array(univs: &[Arc]) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(univs.len(), univs.len()); + for (i, univ) in univs.iter().enumerate() { + let univ_obj = build_ixon_univ(univ); + lean_array_set_core(arr, i, univ_obj); + } + arr + } +} + +// ============================================================================= +// Decode Functions +// ============================================================================= + +/// Decode Ixon.Univ (recursive enum). +/// | zero -- tag 0 (no fields) +/// | succ (u : Univ) -- tag 1 +/// | max (a b : Univ) -- tag 2 +/// | imax (a b : Univ) -- tag 3 +/// | var (idx : UInt64) -- tag 4 (scalar field) +pub fn decode_ixon_univ(ptr: *const c_void) -> IxonUniv { + unsafe { + // Note: .zero is a nullary constructor with tag 0, represented as lean_box(0) + if lean_is_scalar(ptr) { + return IxonUniv::Zero; + } + let tag = lean_obj_tag(ptr as *mut _); + match tag { + 0 => IxonUniv::Zero, + 1 => { + let inner_ptr = lean_ctor_get(ptr as *mut _, 0); + IxonUniv::Succ(Arc::new(decode_ixon_univ(inner_ptr))) + }, + 2 => { + let a_ptr = lean_ctor_get(ptr as *mut _, 0); + let b_ptr = lean_ctor_get(ptr as *mut _, 1); + IxonUniv::Max( + Arc::new(decode_ixon_univ(a_ptr)), + Arc::new(decode_ixon_univ(b_ptr)), + ) + }, + 3 => { + let a_ptr = lean_ctor_get(ptr as *mut _, 0); + let b_ptr = lean_ctor_get(ptr as *mut _, 1); + IxonUniv::IMax( + Arc::new(decode_ixon_univ(a_ptr)), + Arc::new(decode_ixon_univ(b_ptr)), + ) + }, + 4 => { + // scalar field: UInt64 at offset 8 (after header) + let base = ptr.cast::(); + let idx = *(base.add(8).cast::()); + IxonUniv::Var(idx) + }, + _ => panic!("Invalid Ixon.Univ tag: {}", tag), + } + } +} + +/// Decode Array Ixon.Univ. +pub fn decode_ixon_univ_array(ptr: *const c_void) -> Vec> { + let arr: &crate::lean::array::LeanArrayObject = as_ref_unsafe(ptr.cast()); + arr.to_vec(|u| Arc::new(decode_ixon_univ(u))) +} + +// ============================================================================= +// FFI Exports +// ============================================================================= + +/// Round-trip Ixon.Univ. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_ixon_univ(ptr: *const c_void) -> *mut c_void { + let univ = decode_ixon_univ(ptr); + build_ixon_univ(&univ) +} diff --git a/src/lean/ffi/lean_env.rs b/src/lean/ffi/lean_env.rs index 5c9b2ce2..3817e0e4 100644 --- a/src/lean/ffi/lean_env.rs +++ b/src/lean/ffi/lean_env.rs @@ -1,3 +1,17 @@ +//! Decode Lean kernel objects from their in-memory C representation. +//! +//! Provides functions to walk Lean object pointers and decode them into +//! the Rust `Name`, `Level`, `Expr`, and `ConstantInfo` types defined in +//! `crate::ix::env`. Used by the compilation pipeline to read the Lean +//! environment before transforming it to Ixon format. +//! +//! Uses a two-level cache (`GlobalCache` + `LocalCache`) to avoid redundant +//! decoding of shared subterms when processing environments in parallel. + +#![allow(clippy::cast_possible_truncation)] +#![allow(clippy::cast_precision_loss)] +#![allow(clippy::cast_possible_wrap)] + use dashmap::DashMap; use rayon::prelude::*; use std::ffi::c_void; @@ -41,7 +55,7 @@ impl SendPtr { /// Global cache for Names, shared across all threads. #[derive(Default)] -struct GlobalCache { +pub struct GlobalCache { names: DashMap<*const c_void, Name>, } @@ -71,13 +85,13 @@ struct LocalCache { unsafe impl Send for LocalCache {} /// Combined cache reference passed to decoding functions. -struct Cache<'g> { +pub struct Cache<'g> { global: &'g GlobalCache, local: LocalCache, } impl<'g> Cache<'g> { - fn new(global: &'g GlobalCache) -> Self { + pub fn new(global: &'g GlobalCache) -> Self { Self { global, local: LocalCache::default() } } } @@ -94,7 +108,7 @@ fn collect_list_ptrs(mut ptr: *const c_void) -> Vec<*const c_void> { } // Name decoding with global cache -fn lean_ptr_to_name(ptr: *const c_void, global: &GlobalCache) -> Name { +pub fn lean_ptr_to_name(ptr: *const c_void, global: &GlobalCache) -> Name { // Fast path: check if already cached if let Some(name) = global.names.get(&ptr) { return name.clone(); @@ -290,7 +304,7 @@ fn lean_ptr_to_name_data_value( (name, data_value) } -fn lean_ptr_to_expr(ptr: *const c_void, cache: &mut Cache<'_>) -> Expr { +pub fn lean_ptr_to_expr(ptr: *const c_void, cache: &mut Cache<'_>) -> Expr { if let Some(cached) = cache.local.exprs.get(&ptr) { return cached.clone(); } @@ -446,7 +460,7 @@ fn lean_ptr_to_constant_val( ConstantVal { name, level_params, typ } } -fn lean_ptr_to_constant_info( +pub fn lean_ptr_to_constant_info( ptr: *const c_void, cache: &mut Cache<'_>, ) -> ConstantInfo { @@ -699,41 +713,504 @@ pub fn lean_ptr_to_env_sequential(ptr: *const c_void) -> Env { // env.len() //} +// Debug/analysis entry point invoked via the `rust-compile` test flag in +// `Tests/FFI/Basic.lean`. Exercises the full compile→decompile→check→serialize +// roundtrip and size analysis. Output is intentionally suppressed; re-enable +// individual `eprintln!` lines when debugging locally. #[unsafe(no_mangle)] extern "C" fn rs_tmp_decode_const_map(ptr: *const c_void) -> usize { - let start_decoding = std::time::SystemTime::now(); + // Enable hash-consed size tracking for debugging + // TODO: Make this configurable via CLI instead of hardcoded + crate::ix::compile::TRACK_HASH_CONSED_SIZE + .store(true, std::sync::atomic::Ordering::Relaxed); + + // Enable verbose sharing analysis for debugging pathological blocks + // TODO: Make this configurable via CLI instead of hardcoded + crate::ix::compile::ANALYZE_SHARING + .store(false, std::sync::atomic::Ordering::Relaxed); + let env = lean_ptr_to_env(ptr); let env = Arc::new(env); - println!("Decoding: {:.2}s", start_decoding.elapsed().unwrap().as_secs_f32()); - let res = compile_env(&env); - match res { - Ok(stt) => { - println!("Compile OK: {:?}", stt.stats()); - let start_decompiling = std::time::SystemTime::now(); - match decompile_env(&stt) { - Ok(dstt) => { - println!( - "Decompiling: {:.2}s", - start_decompiling.elapsed().unwrap().as_secs_f32() - ); - println!("Decompile OK: {:?}", dstt.stats()); - let start_check = std::time::SystemTime::now(); - match check_decompile(env.as_ref(), &stt, &dstt) { - Ok(()) => { - println!( - "Checking: {:.2}s", - start_check.elapsed().unwrap().as_secs_f32() - ); - println!("Roundtrip OK"); - }, - Err(e) => println!("Roundtrip ERR: {:?}", e), - } - }, - Err(e) => println!("Decompile ERR: {:?}", e), + if let Ok(stt) = compile_env(&env) { + if let Ok(dstt) = decompile_env(&stt) { + let _ = check_decompile(env.as_ref(), &stt, &dstt); + } + + // Measure serialized size (after roundtrip, not counted in total time) + let _ = stt.env.serialized_size_breakdown(); + + // Analyze serialized size of "Nat.add_comm" and its transitive dependencies + analyze_const_size(&stt, "Nat.add_comm"); + + // Analyze hash-consing vs serialization efficiency + analyze_block_size_stats(&stt); + + // Test decompilation from serialized bytes (simulating "over the wire") + let mut serialized = Vec::new(); + stt.env.put(&mut serialized).expect("Env serialization failed"); + + // Deserialize to a fresh Env + let mut buf: &[u8] = &serialized; + if let Ok(fresh_env) = crate::ix::ixon::env::Env::get(&mut buf) { + // Build a fresh CompileState from the deserialized Env + let fresh_stt = crate::ix::compile::CompileState { + env: fresh_env, + name_to_addr: DashMap::new(), + blocks: dashmap::DashSet::new(), + block_stats: DashMap::new(), + }; + + // Populate name_to_addr from env.named + for entry in fresh_stt.env.named.iter() { + fresh_stt + .name_to_addr + .insert(entry.key().clone(), entry.value().addr.clone()); } - }, - Err(e) => println!("Compile ERR: {:?}", e), + + // Populate blocks from constants that are mutual blocks + for entry in fresh_stt.env.consts.iter() { + if matches!( + &entry.value().info, + crate::ix::ixon::constant::ConstantInfo::Muts(_) + ) { + fresh_stt.blocks.insert(entry.key().clone()); + } + } + + // Decompile from the fresh state + if let Ok(dstt2) = decompile_env(&fresh_stt) { + // Verify against original environment + let _ = check_decompile(env.as_ref(), &fresh_stt, &dstt2); + } + } } - println!("Total: {:.2}s", start_decoding.elapsed().unwrap().as_secs_f32()); env.as_ref().len() } + +/// Size breakdown for a constant: alpha-invariant vs metadata +#[derive(Default, Clone)] +struct ConstSizeBreakdown { + alpha_size: usize, // Alpha-invariant constant data + meta_size: usize, // Metadata (names, binder info, etc.) +} + +impl ConstSizeBreakdown { + fn total(&self) -> usize { + self.alpha_size + self.meta_size + } +} + +/// Analyze the serialized size of a constant and its transitive dependencies. +fn analyze_const_size(stt: &crate::ix::compile::CompileState, name_str: &str) { + use crate::ix::address::Address; + use std::collections::{HashSet, VecDeque}; + + // Build a global name index for metadata serialization + let name_index = build_name_index(stt); + + // Parse the name (e.g., "Nat.add_comm" -> Name::str(Name::str(Name::anon(), "Nat"), "add_comm")) + let name = parse_name(name_str); + + // Look up the constant's address + let addr = match stt.name_to_addr.get(&name) { + Some(a) => a.clone(), + None => { + println!("\n=== Size analysis for {} ===", name_str); + println!(" Constant not found"); + return; + }, + }; + + // Get the constant + let constant = match stt.env.consts.get(&addr) { + Some(c) => c.clone(), + None => { + println!("\n=== Size analysis for {} ===", name_str); + println!(" Constant data not found at address"); + return; + }, + }; + + // Compute direct sizes (alpha-invariant and metadata) + let direct_breakdown = + compute_const_size_breakdown(&constant, &name, stt, &name_index); + + // BFS to collect all transitive dependencies + let mut visited: HashSet
= HashSet::new(); + let mut queue: VecDeque
= VecDeque::new(); + let mut dep_breakdowns: Vec<(String, ConstSizeBreakdown)> = Vec::new(); + + // Start with the constant's refs + visited.insert(addr.clone()); + for dep_addr in &constant.refs { + if !visited.contains(dep_addr) { + queue.push_back(dep_addr.clone()); + visited.insert(dep_addr.clone()); + } + } + + // BFS through all transitive dependencies + while let Some(dep_addr) = queue.pop_front() { + if let Some(dep_const) = stt.env.consts.get(&dep_addr) { + // Get the name for this dependency + let dep_name_opt = stt.env.get_name_by_addr(&dep_addr); + let dep_name_str = dep_name_opt + .as_ref() + .map_or_else(|| format!("{:?}", dep_addr), |n| n.pretty()); + + let breakdown = if let Some(ref dep_name) = dep_name_opt { + compute_const_size_breakdown(&dep_const, dep_name, stt, &name_index) + } else { + ConstSizeBreakdown { + alpha_size: serialized_const_size(&dep_const), + meta_size: 0, + } + }; + + dep_breakdowns.push((dep_name_str, breakdown)); + + // Add this constant's refs to the queue + for ref_addr in &dep_const.refs { + if !visited.contains(ref_addr) { + queue.push_back(ref_addr.clone()); + visited.insert(ref_addr.clone()); + } + } + } + } + + // Sort by total size descending + dep_breakdowns.sort_by(|a, b| b.1.total().cmp(&a.1.total())); + + let total_deps_alpha: usize = + dep_breakdowns.iter().map(|(_, b)| b.alpha_size).sum(); + let total_deps_meta: usize = + dep_breakdowns.iter().map(|(_, b)| b.meta_size).sum(); + let total_deps_size = total_deps_alpha + total_deps_meta; + + let total_alpha = direct_breakdown.alpha_size + total_deps_alpha; + let total_meta = direct_breakdown.meta_size + total_deps_meta; + let total_size = total_alpha + total_meta; + + println!("\n=== Size analysis for {} ===", name_str); + println!( + " Direct alpha-invariant size: {} bytes", + direct_breakdown.alpha_size + ); + println!(" Direct metadata size: {} bytes", direct_breakdown.meta_size); + println!(" Direct total size: {} bytes", direct_breakdown.total()); + println!(); + println!(" Transitive dependencies: {} constants", dep_breakdowns.len()); + println!( + " Dependencies alpha-invariant: {} bytes ({:.2} KB)", + total_deps_alpha, + total_deps_alpha as f64 / 1024.0 + ); + println!( + " Dependencies metadata: {} bytes ({:.2} KB)", + total_deps_meta, + total_deps_meta as f64 / 1024.0 + ); + println!( + " Dependencies total: {} bytes ({:.2} KB)", + total_deps_size, + total_deps_size as f64 / 1024.0 + ); + println!(); + println!( + " TOTAL alpha-invariant: {} bytes ({:.2} KB)", + total_alpha, + total_alpha as f64 / 1024.0 + ); + println!( + " TOTAL metadata: {} bytes ({:.2} KB)", + total_meta, + total_meta as f64 / 1024.0 + ); + println!( + " TOTAL size: {} bytes ({:.2} KB)", + total_size, + total_size as f64 / 1024.0 + ); + + // Show top 10 largest dependencies + if !dep_breakdowns.is_empty() { + println!("\n Top 10 largest dependencies (by total size):"); + for (name, breakdown) in dep_breakdowns.iter().take(10) { + println!( + " {} bytes (alpha: {}, meta: {}): {}", + breakdown.total(), + breakdown.alpha_size, + breakdown.meta_size, + name + ); + } + } +} + +/// Build a name index for metadata serialization. +fn build_name_index( + stt: &crate::ix::compile::CompileState, +) -> crate::ix::ixon::metadata::NameIndex { + use crate::ix::address::Address; + use crate::ix::ixon::metadata::NameIndex; + + let mut idx = NameIndex::new(); + let mut counter: u64 = 0; + + // Add all names from the names map + for entry in stt.env.names.iter() { + idx.insert(entry.key().clone(), counter); + counter += 1; + } + + // Add anonymous name + let anon_addr = Address::from_blake3_hash(*Name::anon().get_hash()); + idx.entry(anon_addr).or_insert(counter); + + idx +} + +/// Compute size breakdown for a constant (alpha-invariant vs metadata). +fn compute_const_size_breakdown( + constant: &crate::ix::ixon::constant::Constant, + name: &Name, + stt: &crate::ix::compile::CompileState, + name_index: &crate::ix::ixon::metadata::NameIndex, +) -> ConstSizeBreakdown { + // Alpha-invariant size + let alpha_size = serialized_const_size(constant); + + // Metadata size + let meta_size = if let Some(named) = stt.env.named.get(name) { + serialized_meta_size(&named.meta, name_index) + } else { + 0 + }; + + ConstSizeBreakdown { alpha_size, meta_size } +} + +/// Compute the serialized size of constant metadata. +fn serialized_meta_size( + meta: &crate::ix::ixon::metadata::ConstantMeta, + name_index: &crate::ix::ixon::metadata::NameIndex, +) -> usize { + let mut buf = Vec::new(); + meta + .put_indexed(name_index, &mut buf) + .expect("metadata serialization failed"); + buf.len() +} + +/// Parse a dotted name string into a Name. +fn parse_name(s: &str) -> Name { + let parts: Vec<&str> = s.split('.').collect(); + let mut name = Name::anon(); + for part in parts { + name = Name::str(name, part.to_string()); + } + name +} + +/// Compute the serialized size of a constant. +fn serialized_const_size( + constant: &crate::ix::ixon::constant::Constant, +) -> usize { + let mut buf = Vec::new(); + constant.put(&mut buf); + buf.len() +} + +/// Analyze block size statistics: hash-consing vs serialization. +fn analyze_block_size_stats(stt: &crate::ix::compile::CompileState) { + use crate::ix::compile::BlockSizeStats; + + // Check if hash-consed size tracking was enabled + let tracking_enabled = crate::ix::compile::TRACK_HASH_CONSED_SIZE + .load(std::sync::atomic::Ordering::Relaxed); + if !tracking_enabled { + println!("\n=== Block Size Analysis ==="); + println!( + " Hash-consed size tracking disabled (set IX_TRACK_HASH_CONSED=1 to enable)" + ); + return; + } + + // Collect all stats into a vector for analysis + let stats: Vec<(String, BlockSizeStats)> = stt + .block_stats + .iter() + .map(|entry| (entry.key().pretty(), entry.value().clone())) + .collect(); + + if stats.is_empty() { + println!("\n=== Block Size Analysis ==="); + println!(" No block statistics collected"); + return; + } + + // Compute totals + let total_hash_consed: usize = + stats.iter().map(|(_, s)| s.hash_consed_size).sum(); + let total_serialized: usize = + stats.iter().map(|(_, s)| s.serialized_size).sum(); + let total_blocks = stats.len(); + let total_consts: usize = stats.iter().map(|(_, s)| s.const_count).sum(); + + // Compute per-block overhead (serialized - hash_consed) + let mut overheads: Vec<(String, isize, f64, usize)> = stats + .iter() + .map(|(name, s)| { + let overhead = s.serialized_size as isize - s.hash_consed_size as isize; + let ratio = if s.hash_consed_size > 0 { + s.serialized_size as f64 / s.hash_consed_size as f64 + } else { + 1.0 + }; + (name.clone(), overhead, ratio, s.const_count) + }) + .collect(); + + // Sort by overhead descending (most bloated first) + overheads.sort_by(|a, b| b.1.cmp(&a.1)); + + // Compute statistics + let avg_ratio = if total_hash_consed > 0 { + total_serialized as f64 / total_hash_consed as f64 + } else { + 1.0 + }; + + // Find blocks with worst ratio (only for blocks with >100 bytes hash-consed) + let mut ratios: Vec<_> = stats + .iter() + .filter(|(_, s)| s.hash_consed_size > 100) + .map(|(name, s)| { + let ratio = s.serialized_size as f64 / s.hash_consed_size as f64; + (name.clone(), ratio, s.hash_consed_size, s.serialized_size) + }) + .collect(); + ratios + .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + + println!("\n=== Block Size Analysis (Hash-Consing vs Serialization) ==="); + println!(" Total blocks: {}", total_blocks); + println!(" Total constants: {}", total_consts); + println!(); + println!( + " Total hash-consed size: {} bytes ({:.2} KB)", + total_hash_consed, + total_hash_consed as f64 / 1024.0 + ); + println!( + " Total serialized size: {} bytes ({:.2} KB)", + total_serialized, + total_serialized as f64 / 1024.0 + ); + println!(" Overall ratio: {:.3}x", avg_ratio); + println!( + " Total overhead: {} bytes ({:.2} KB)", + total_serialized as isize - total_hash_consed as isize, + (total_serialized as f64 - total_hash_consed as f64) / 1024.0 + ); + + // Distribution of ratios (more granular buckets for analysis) + let count_in_range = |lo: f64, hi: f64| -> usize { + stats + .iter() + .filter(|(_, s)| { + if s.hash_consed_size == 0 { + return false; + } + let r = s.serialized_size as f64 / s.hash_consed_size as f64; + r >= lo && r < hi + }) + .count() + }; + + let ratio_under_0_05 = count_in_range(0.0, 0.05); + let ratio_0_05_to_0_1 = count_in_range(0.05, 0.1); + let ratio_0_1_to_0_2 = count_in_range(0.1, 0.2); + let ratio_0_2_to_0_5 = count_in_range(0.2, 0.5); + let ratio_0_5_to_1 = count_in_range(0.5, 1.0); + let ratio_1_to_1_5 = count_in_range(1.0, 1.5); + let ratio_1_5_to_2 = count_in_range(1.5, 2.0); + let ratio_over_2 = count_in_range(2.0, f64::INFINITY); + + println!(); + println!(" Ratio distribution (serialized / hash-consed):"); + println!(" < 0.05x (20x+ compression): {} blocks", ratio_under_0_05); + println!(" 0.05-0.1x (10-20x): {} blocks", ratio_0_05_to_0_1); + println!(" 0.1-0.2x (5-10x): {} blocks", ratio_0_1_to_0_2); + println!(" 0.2-0.5x (2-5x): {} blocks", ratio_0_2_to_0_5); + println!(" 0.5-1.0x (1-2x): {} blocks", ratio_0_5_to_1); + println!(" 1.0-1.5x (slight bloat): {} blocks", ratio_1_to_1_5); + println!(" 1.5-2.0x: {} blocks", ratio_1_5_to_2); + println!(" >= 2.0x (high bloat): {} blocks", ratio_over_2); + + // Top 10 blocks by absolute overhead + if !overheads.is_empty() { + println!(); + println!(" Top 10 blocks by overhead (serialized - hash_consed):"); + for (name, overhead, ratio, const_count) in overheads.iter().take(10) { + println!( + " {:+} bytes ({:.2}x, {} consts): {}", + overhead, + ratio, + const_count, + truncate_name(name, 50) + ); + } + } + + // Top 10 blocks by worst ratio (with >100 bytes) + if !ratios.is_empty() { + println!(); + println!(" Top 10 blocks by ratio (hash-consed > 100 bytes):"); + for (name, ratio, hc, ser) in ratios.iter().take(10) { + println!( + " {:.2}x ({} -> {} bytes): {}", + ratio, + hc, + ser, + truncate_name(name, 50) + ); + } + } + + // Bottom 10 blocks by ratio (best compression) + let mut best_ratios: Vec<_> = stats + .iter() + .filter(|(_, s)| s.hash_consed_size > 100) + .map(|(name, s)| { + let ratio = s.serialized_size as f64 / s.hash_consed_size as f64; + (name.clone(), ratio, s.hash_consed_size, s.serialized_size) + }) + .collect(); + best_ratios + .sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)); + + if !best_ratios.is_empty() { + println!(); + println!(" Top 10 blocks by best ratio (most efficient):"); + for (name, ratio, hc, ser) in best_ratios.iter().take(10) { + println!( + " {:.2}x ({} -> {} bytes): {}", + ratio, + hc, + ser, + truncate_name(name, 50) + ); + } + } +} + +/// Truncate a name for display. +fn truncate_name(name: &str, max_len: usize) -> String { + if name.len() <= max_len { + name.to_string() + } else { + format!("...{}", &name[name.len() - max_len + 3..]) + } +} diff --git a/src/lean/ffi/primitives.rs b/src/lean/ffi/primitives.rs new file mode 100644 index 00000000..7dab07a2 --- /dev/null +++ b/src/lean/ffi/primitives.rs @@ -0,0 +1,444 @@ +//! Basic Lean type encode/decode/roundtrip operations. +//! +//! This module provides FFI functions for primitive Lean types: +//! - Nat, String, Bool +//! - Option, Pair +//! - List, Array, ByteArray +//! - AssocList, HashMap + +use std::ffi::c_void; + +use crate::lean::array::LeanArrayObject; +use crate::lean::nat::Nat; +use crate::lean::sarray::LeanSArrayObject; +use crate::lean::string::LeanStringObject; +use crate::lean::{ + as_ref_unsafe, lean_alloc_array, lean_alloc_ctor, lean_alloc_sarray, + lean_array_get_core, lean_array_set_core, lean_box_fn, lean_ctor_get, + lean_ctor_set, lean_is_scalar, lean_mk_string, lean_obj_tag, + lean_sarray_cptr, lean_uint64_to_nat, +}; + +// ============================================================================= +// Nat Building +// ============================================================================= + +/// Build a Lean Nat from a Rust Nat. +pub fn build_nat(n: &Nat) -> *mut c_void { + // Try to get as u64 first + if let Some(val) = n.to_u64() { + // For small values that fit in a boxed scalar (max value is usize::MAX >> 1) + if val <= (usize::MAX >> 1) as u64 { + #[allow(clippy::cast_possible_truncation)] + return lean_box_fn(val as usize); + } + // For larger u64 values, use lean_uint64_to_nat + return unsafe { lean_uint64_to_nat(val) }; + } + // For values larger than u64, convert to limbs and use GMP + let bytes = n.to_le_bytes(); + let mut limbs: Vec = Vec::with_capacity(bytes.len().div_ceil(8)); + for chunk in bytes.chunks(8) { + let mut arr = [0u8; 8]; + arr[..chunk.len()].copy_from_slice(chunk); + limbs.push(u64::from_le_bytes(arr)); + } + unsafe { crate::lean::lean_nat_from_limbs(limbs.len(), limbs.as_ptr()) } +} + +// ============================================================================= +// Round-trip FFI Functions for Testing +// ============================================================================= + +/// Round-trip a Nat: decode from Lean, re-encode to Lean. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_nat(nat_ptr: *const c_void) -> *mut c_void { + // Decode + let nat = Nat::from_ptr(nat_ptr); + // Re-encode + build_nat(&nat) +} + +/// Round-trip a String: decode from Lean, re-encode to Lean. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_string(s_ptr: *const c_void) -> *mut c_void { + // Decode + let s_obj: &LeanStringObject = as_ref_unsafe(s_ptr.cast()); + let s = s_obj.as_string(); + // Re-encode + unsafe { + let cstr = crate::lean::safe_cstring(s.as_str()); + lean_mk_string(cstr.as_ptr()) + } +} + +/// Round-trip a List Nat: decode from Lean, re-encode to Lean. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_list_nat( + list_ptr: *const c_void, +) -> *mut c_void { + // Decode list to Vec + let nats: Vec = crate::lean::collect_list(list_ptr, Nat::from_ptr); + // Re-encode as Lean List + build_list_nat(&nats) +} + +/// Round-trip an Array Nat: decode from Lean, re-encode to Lean. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_array_nat( + arr_ptr: *const c_void, +) -> *mut c_void { + // Decode array + let arr_obj: &LeanArrayObject = as_ref_unsafe(arr_ptr.cast()); + let nats: Vec = + arr_obj.data().iter().map(|&p| Nat::from_ptr(p)).collect(); + // Re-encode as Lean Array + build_array_nat(&nats) +} + +/// Round-trip a ByteArray: decode from Lean, re-encode to Lean. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_bytearray(ba_ptr: *const c_void) -> *mut c_void { + // Decode ByteArray (scalar array of u8) + let sarray: &LeanSArrayObject = as_ref_unsafe(ba_ptr.cast()); + let bytes = sarray.data(); + // Re-encode + unsafe { + let ba = lean_alloc_sarray(1, bytes.len(), bytes.len()); + let data_ptr = lean_sarray_cptr(ba); + std::ptr::copy_nonoverlapping(bytes.as_ptr(), data_ptr, bytes.len()); + ba + } +} + +/// Round-trip a Bool: decode from Lean, re-encode. +/// Bool in Lean is passed as unboxed scalar: false = 0, true = 1 +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_bool(bool_ptr: *const c_void) -> *mut c_void { + // Bool is passed as unboxed scalar - just return it as-is + bool_ptr as *mut c_void +} + +// ============================================================================= +// Helper functions for building basic Lean types +// ============================================================================= + +/// Build a Lean List Nat from a Vec. +fn build_list_nat(nats: &[Nat]) -> *mut c_void { + unsafe { + // Build list in reverse (cons builds from the end) + let mut list = lean_box_fn(0); // nil + for nat in nats.iter().rev() { + let nat_obj = build_nat(nat); + // cons : α → List α → List α (tag 1, 2 object fields) + let cons = lean_alloc_ctor(1, 2, 0); + lean_ctor_set(cons, 0, nat_obj); + lean_ctor_set(cons, 1, list); + list = cons; + } + list + } +} + +/// Build a Lean Array Nat from a Vec. +fn build_array_nat(nats: &[Nat]) -> *mut c_void { + unsafe { + let arr = lean_alloc_array(nats.len(), nats.len()); + for (i, nat) in nats.iter().enumerate() { + let nat_obj = build_nat(nat); + lean_array_set_core(arr, i, nat_obj); + } + arr + } +} + +// ============================================================================= +// FFI roundtrip functions for struct/inductive/HashMap +// ============================================================================= + +/// Round-trip a Point (structure with x, y : Nat). +/// Point is a structure, which in Lean is represented as a constructor with tag 0. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_point(point_ptr: *const c_void) -> *mut c_void { + unsafe { + // Point is a structure (single constructor, tag 0) with 2 Nat fields + let x_ptr = lean_ctor_get(point_ptr as *mut _, 0); + let y_ptr = lean_ctor_get(point_ptr as *mut _, 1); + + // Decode the Nats + let x = Nat::from_ptr(x_ptr); + let y = Nat::from_ptr(y_ptr); + + // Re-encode as Point + let point = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(point, 0, build_nat(&x)); + lean_ctor_set(point, 1, build_nat(&y)); + point + } +} + +/// Round-trip a NatTree (inductive with leaf : Nat → NatTree | node : NatTree → NatTree → NatTree). +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_nat_tree( + tree_ptr: *const c_void, +) -> *mut c_void { + roundtrip_nat_tree_recursive(tree_ptr) +} + +fn roundtrip_nat_tree_recursive(tree_ptr: *const c_void) -> *mut c_void { + unsafe { + let tag = lean_obj_tag(tree_ptr as *mut _); + match tag { + 0 => { + // leaf : Nat → NatTree + let nat_ptr = lean_ctor_get(tree_ptr as *mut _, 0); + let nat = Nat::from_ptr(nat_ptr); + let leaf = lean_alloc_ctor(0, 1, 0); + lean_ctor_set(leaf, 0, build_nat(&nat)); + leaf + }, + 1 => { + // node : NatTree → NatTree → NatTree + let left_ptr = lean_ctor_get(tree_ptr as *mut _, 0); + let right_ptr = lean_ctor_get(tree_ptr as *mut _, 1); + let left = roundtrip_nat_tree_recursive(left_ptr); + let right = roundtrip_nat_tree_recursive(right_ptr); + let node = lean_alloc_ctor(1, 2, 0); + lean_ctor_set(node, 0, left); + lean_ctor_set(node, 1, right); + node + }, + _ => panic!("Invalid NatTree tag: {}", tag), + } + } +} + +/// Round-trip an AssocList Nat Nat. +/// AssocList: nil (tag 0, 0 fields) | cons key value tail (tag 1, 3 fields) +/// Note: nil with 0 fields may be represented as lean_box(0) +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_assoclist_nat_nat( + list_ptr: *const c_void, +) -> *mut c_void { + // Check if it's a scalar (nil represented as lean_box(0)) + if lean_is_scalar(list_ptr) { + // Return lean_box(0) for nil + return lean_box_fn(0); + } + let pairs = decode_assoc_list_nat_nat(list_ptr); + build_assoc_list_nat_nat(&pairs) +} + +/// Build an AssocList Nat Nat from pairs +fn build_assoc_list_nat_nat(pairs: &[(Nat, Nat)]) -> *mut c_void { + unsafe { + // Build in reverse to preserve order + // AssocList.nil with 0 fields is represented as lean_box(0) + let mut list = lean_box_fn(0); + for (k, v) in pairs.iter().rev() { + let cons = lean_alloc_ctor(1, 3, 0); // AssocList.cons + lean_ctor_set(cons, 0, build_nat(k)); + lean_ctor_set(cons, 1, build_nat(v)); + lean_ctor_set(cons, 2, list); + list = cons; + } + list + } +} + +/// Round-trip a DHashMap.Raw Nat Nat. +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_dhashmap_raw_nat_nat( + raw_ptr: *const c_void, +) -> *mut c_void { + unsafe { + if lean_is_scalar(raw_ptr) { + return raw_ptr as *mut c_void; + } + + let size_ptr = lean_ctor_get(raw_ptr as *mut _, 0); + let buckets_ptr = lean_ctor_get(raw_ptr as *mut _, 1); + + let size = Nat::from_ptr(size_ptr); + + // Decode and rebuild buckets + let buckets_obj: &LeanArrayObject = as_ref_unsafe(buckets_ptr.cast()); + let num_buckets = buckets_obj.data().len(); + + let mut all_pairs: Vec<(Nat, Nat)> = Vec::new(); + for &bucket_ptr in buckets_obj.data() { + let pairs = decode_assoc_list_nat_nat(bucket_ptr); + all_pairs.extend(pairs); + } + + // Rebuild buckets + let new_buckets = lean_alloc_array(num_buckets, num_buckets); + for i in 0..num_buckets { + lean_array_set_core(new_buckets, i, lean_box_fn(0)); // AssocList.nil + } + + for (k, v) in &all_pairs { + let k_u64 = k.to_u64().unwrap_or_else(|| { + let bytes = k.to_le_bytes(); + let mut arr = [0u8; 8]; + let len = bytes.len().min(8); + arr[..len].copy_from_slice(&bytes[..len]); + u64::from_le_bytes(arr) + }); + #[allow(clippy::cast_possible_truncation)] + let bucket_idx = (k_u64 as usize) & (num_buckets - 1); + + let old_bucket = + lean_array_get_core(new_buckets, bucket_idx) as *mut c_void; + let new_bucket = lean_alloc_ctor(1, 3, 0); + lean_ctor_set(new_bucket, 0, build_nat(k)); + lean_ctor_set(new_bucket, 1, build_nat(v)); + lean_ctor_set(new_bucket, 2, old_bucket); + lean_array_set_core(new_buckets, bucket_idx, new_bucket); + } + + // Build Raw + let raw = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(raw, 0, build_nat(&size)); + lean_ctor_set(raw, 1, new_buckets); + + raw + } +} + +/// Round-trip a Std.HashMap Nat Nat. +/// +/// IMPORTANT: Single-field structures are unboxed in Lean 4! +/// - HashMap has 1 field (inner : DHashMap) +/// - DHashMap has 1 field (inner : Raw) - wf : Prop is erased +/// So HashMap pointer points DIRECTLY to Raw! +/// +/// Memory layout (after unboxing): +/// - HashMap/DHashMap/Raw all share the same pointer +/// - Raw: ctor 0, 2 fields +/// - field 0: size : Nat +/// - field 1: buckets : Array (AssocList α β) +/// - AssocList: +/// - nil: lean_box(0) +/// - cons key value tail: ctor 1, 3 fields +#[unsafe(no_mangle)] +pub extern "C" fn rs_roundtrip_hashmap_nat_nat( + map_ptr: *const c_void, +) -> *mut c_void { + unsafe { + // Due to unboxing, map_ptr points directly to Raw + let size_ptr = lean_ctor_get(map_ptr as *mut _, 0); + let buckets_ptr = lean_ctor_get(map_ptr as *mut _, 1); + + let size = Nat::from_ptr(size_ptr); + + // Decode buckets (Array of AssocLists) + let buckets_obj: &LeanArrayObject = as_ref_unsafe(buckets_ptr.cast()); + let mut pairs: Vec<(Nat, Nat)> = Vec::new(); + + for &bucket_ptr in buckets_obj.data() { + // Each bucket is an AssocList + let bucket_pairs = decode_assoc_list_nat_nat(bucket_ptr); + pairs.extend(bucket_pairs); + } + + // Rebuild the HashMap with the same bucket count + let num_buckets = buckets_obj.data().len(); + let new_buckets = lean_alloc_array(num_buckets, num_buckets); + + // Initialize all buckets to AssocList.nil (lean_box(0)) + for i in 0..num_buckets { + lean_array_set_core(new_buckets, i, lean_box_fn(0)); // AssocList.nil + } + + // Insert each pair into the appropriate bucket using Lean's hash function + for (k, v) in &pairs { + // Hash the key - for Nat, Lean uses the value itself as hash + let k_u64 = k.to_u64().unwrap_or_else(|| { + // For large nats, use low 64 bits + let bytes = k.to_le_bytes(); + let mut arr = [0u8; 8]; + let len = bytes.len().min(8); + arr[..len].copy_from_slice(&bytes[..len]); + u64::from_le_bytes(arr) + }); + // Lean uses (hash & (buckets.size - 1)) for bucket index (power of 2) + #[allow(clippy::cast_possible_truncation)] + let bucket_idx = (k_u64 as usize) & (num_buckets - 1); + + // Get current bucket AssocList + let old_bucket = + lean_array_get_core(new_buckets, bucket_idx) as *mut c_void; + + // Build AssocList.cons key value tail (tag 1, 3 fields) + let new_bucket = lean_alloc_ctor(1, 3, 0); + lean_ctor_set(new_bucket, 0, build_nat(k)); + lean_ctor_set(new_bucket, 1, build_nat(v)); + lean_ctor_set(new_bucket, 2, old_bucket); + + lean_array_set_core(new_buckets, bucket_idx, new_bucket); + } + + // Build Raw (ctor 0, 2 fields: size, buckets) + // Due to unboxing, this IS the HashMap + let raw = lean_alloc_ctor(0, 2, 0); + lean_ctor_set(raw, 0, build_nat(&size)); + lean_ctor_set(raw, 1, new_buckets); + + raw + } +} + +/// Decode a Lean AssocList Nat Nat to Vec of pairs +/// AssocList: nil (tag 0) | cons key value tail (tag 1, 3 fields) +pub fn decode_assoc_list_nat_nat(list_ptr: *const c_void) -> Vec<(Nat, Nat)> { + let mut result = Vec::new(); + let mut current = list_ptr; + + loop { + unsafe { + // Check if scalar (shouldn't happen) or object + if lean_is_scalar(current) { + break; + } + + let tag = lean_obj_tag(current as *mut _); + if tag == 0 { + // AssocList.nil + break; + } + + // AssocList.cons: 3 fields (key, value, tail) + let key_ptr = lean_ctor_get(current as *mut _, 0); + let value_ptr = lean_ctor_get(current as *mut _, 1); + let tail_ptr = lean_ctor_get(current as *mut _, 2); + + let k = Nat::from_ptr(key_ptr); + let v = Nat::from_ptr(value_ptr); + + result.push((k, v)); + current = tail_ptr; + } + } + + result +} + +// ============================================================================= +// Utility FFI Functions +// ============================================================================= + +/// Read first 8 bytes of a ByteArray as little-endian UInt64. +/// Used by Address.Hashable to match Rust's bucket hash computation. +/// This is essentially just a pointer cast - very fast. +#[unsafe(no_mangle)] +pub extern "C" fn rs_bytearray_to_u64_le(ba_ptr: *const c_void) -> u64 { + unsafe { + let arr: &LeanSArrayObject = &*ba_ptr.cast::(); + if arr.data().len() < 8 { + return 0; + } + let data_ptr = lean_sarray_cptr(ba_ptr as *mut _); + std::ptr::read_unaligned(data_ptr as *const u64) + } +} diff --git a/src/lean/mod.rs b/src/lean/mod.rs deleted file mode 100644 index 74b7b1a3..00000000 --- a/src/lean/mod.rs +++ /dev/null @@ -1,169 +0,0 @@ -//! Rust bindings for Lean, implemented by mimicking the memory layout of Lean's -//! low-level C objects. -//! -//! This crate must be kept in sync with `lean/lean.h`. Pay close attention to -//! definitions containing C code in their docstrings. - -pub mod array; -pub mod boxed; -pub mod ctor; -pub mod external; -pub mod ffi; -pub mod nat; -pub mod object; -pub mod sarray; -pub mod string; - -use std::ffi::c_void; - -use crate::lean::{ - boxed::{BoxedU64, BoxedUSize}, - ctor::LeanCtorObject, -}; - -#[inline] -#[allow(clippy::not_unsafe_ptr_arg_deref)] -pub fn as_ref_unsafe<'a, T>(ptr: *const T) -> &'a T { - let t_ref = unsafe { ptr.as_ref() }; - t_ref.expect("Null pointer dereference") -} - -#[inline] -#[allow(clippy::not_unsafe_ptr_arg_deref)] -pub fn as_mut_unsafe<'a, T>(ptr: *mut T) -> &'a mut T { - let t_ref = unsafe { ptr.as_mut() }; - t_ref.expect("Null pointer dereference") -} - -/// ```c -/// bool lean_is_scalar(lean_object * o) { return ((size_t)(o) & 1) == 1; } -/// ``` -#[inline] -pub fn lean_is_scalar(ptr: *const T) -> bool { - ptr as usize & 1 == 1 -} - -#[macro_export] -/// ```c -/// lean_object * lean_box(size_t n) { return (lean_object*)(((size_t)(n) << 1) | 1); } -/// ``` -macro_rules! lean_box { - ($e:expr) => { - (($e << 1) | 1) as *const std::ffi::c_void - }; -} - -/// ```c -/// size_t lean_unbox(lean_object * o) { return (size_t)(o) >> 1; } -/// ``` -#[macro_export] -macro_rules! lean_unbox { - ($t:ident, $e:expr) => { - $t::try_from(($e as usize) >> 1).expect("Unintended truncation") - }; -} - -/// ```c -/// unsigned lean_unbox_uint32(b_lean_obj_arg o) { -/// if (sizeof(void*) == 4) { -/// /* 32-bit implementation */ -/// return lean_ctor_get_uint32(o, 0); -/// } else { -/// /* 64-bit implementation */ -/// return lean_unbox(o); -/// } -/// } -/// ``` -#[inline] -pub fn lean_unbox_u32(ptr: *const c_void) -> u32 { - if cfg!(target_pointer_width = "32") { - let boxed_usize: &BoxedUSize = as_ref_unsafe(ptr.cast()); - u32::try_from(boxed_usize.value).expect("Cannot convert from usize") - } else { - lean_unbox!(u32, ptr) - } -} - -/// ```c -/// uint64_t lean_unbox_uint64(b_lean_obj_arg o) { -/// return lean_ctor_get_uint64(o, 0); -/// } -/// ``` -#[inline] -pub fn lean_unbox_u64(ptr: *const c_void) -> u64 { - let boxed_usize: &BoxedU64 = as_ref_unsafe(ptr.cast()); - boxed_usize.value -} - -pub fn boxed_usize_ptr_to_usize(ptr: *const c_void) -> usize { - let boxed_usize_ptr = ptr.cast::(); - let boxed_usize = as_ref_unsafe(boxed_usize_ptr); - boxed_usize.value -} - -/// Emulates arrays of flexible size from C. -#[repr(C)] -pub struct CArray([T; 0]); - -impl CArray { - #[inline] - pub fn slice(&self, len: usize) -> &[T] { - unsafe { std::slice::from_raw_parts(self.0.as_ptr(), len) } - } - - #[inline] - pub fn copy_from_slice(&mut self, src: &[T]) { - unsafe { - std::ptr::copy_nonoverlapping( - src.as_ptr(), - self.0.as_ptr() as *mut _, - src.len(), - ); - } - } -} - -pub struct ListIterator(*const c_void); - -impl Iterator for ListIterator { - type Item = *const c_void; - fn next(&mut self) -> Option { - let ptr = self.0; - if lean_is_scalar(ptr) { - return None; - } - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [head_ptr, tail_ptr] = ctor.objs(); - self.0 = tail_ptr; - Some(head_ptr) - } -} - -pub fn collect_list( - mut ptr: *const c_void, - map_fn: fn(*const c_void) -> T, -) -> Vec { - let mut vec = Vec::new(); - while !lean_is_scalar(ptr) { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [head_ptr, tail_ptr] = ctor.objs(); - vec.push(map_fn(head_ptr)); - ptr = tail_ptr; - } - vec -} - -pub fn collect_list_with( - mut ptr: *const c_void, - map_fn: fn(*const c_void, &mut C) -> T, - c: &mut C, -) -> Vec { - let mut vec = Vec::new(); - while !lean_is_scalar(ptr) { - let ctor: &LeanCtorObject = as_ref_unsafe(ptr.cast()); - let [head_ptr, tail_ptr] = ctor.objs(); - vec.push(map_fn(head_ptr, c)); - ptr = tail_ptr; - } - vec -} diff --git a/src/lean/nat.rs b/src/lean/nat.rs index cbe31f52..847536be 100644 --- a/src/lean/nat.rs +++ b/src/lean/nat.rs @@ -1,4 +1,10 @@ +//! Lean `Nat` (arbitrary-precision natural number) representation. +//! +//! Lean stores small naturals as tagged scalars and large ones as GMP +//! `mpz_object`s on the heap. This module handles both representations. + use std::ffi::c_void; +use std::fmt; use num_bigint::BigUint; @@ -7,12 +13,33 @@ use crate::{ lean_unbox, }; +/// Arbitrary-precision natural number, wrapping `BigUint`. #[derive(Hash, PartialEq, Eq, Debug, Clone, PartialOrd, Ord)] pub struct Nat(pub BigUint); +impl fmt::Display for Nat { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for Nat { + fn from(x: u64) -> Self { + Nat(BigUint::from(x)) + } +} + impl Nat { pub const ZERO: Self = Self(BigUint::ZERO); + /// Try to convert to u64, returning None if the value is too large. + #[inline] + pub fn to_u64(&self) -> Option { + u64::try_from(&self.0).ok() + } + + /// Decode a `Nat` from a Lean object pointer. Handles both scalar (unboxed) + /// and heap-allocated (GMP `mpz_object`) representations. pub fn from_ptr(ptr: *const c_void) -> Nat { if lean_is_scalar(ptr) { let u = lean_unbox!(usize, ptr); diff --git a/src/lean/sarray.rs b/src/lean/sarray.rs index 7213358e..b3b5789e 100644 --- a/src/lean/sarray.rs +++ b/src/lean/sarray.rs @@ -1,3 +1,5 @@ +//! Lean scalar array (`ByteArray`) object layout. + use super::{CArray, object::LeanObject}; /// ```c @@ -22,6 +24,11 @@ impl LeanSArrayObject { self.m_data.slice(self.m_size) } + #[inline] + pub fn data_mut(&mut self) -> &mut [u8] { + self.m_data.slice_mut(self.m_size) + } + pub fn set_data(&mut self, data: &[u8]) { assert!(self.m_capacity >= data.len()); self.m_data.copy_from_slice(data);