-
Notifications
You must be signed in to change notification settings - Fork 141
feat: Splay Tree Formalisation #568
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,258 @@ | ||
| /- | ||
| Copyright (c) 2025 Sorrachai Yingchareonthawornchai. All rights reserved. | ||
| Released under Apache 2.0 license as described in the file LICENSE. | ||
| Authors: Anton Kovsharov, Antoine du Fresne von Hohenesche, | ||
| Sorrachai Yingchareonthawornchai | ||
| -/ | ||
|
|
||
| module | ||
|
|
||
| public import Mathlib.Combinatorics.SimpleGraph.Basic | ||
| public import Mathlib.Combinatorics.SimpleGraph.Metric | ||
|
|
||
| @[expose] public section | ||
|
|
||
| /-! | ||
| # Binary Tree | ||
|
|
||
| In this file we introduce the `BinaryTree` data structure and its basic operations. | ||
| -/ | ||
|
|
||
| variable {α : Type} | ||
|
|
||
| inductive BinaryTree (α : Type) where | ||
| | empty | ||
| | node (left : BinaryTree α) (key : α) (right : BinaryTree α) | ||
| deriving DecidableEq | ||
|
|
||
| namespace BinaryTree | ||
|
|
||
| /-! ### Core Definitions -/ | ||
| section CoreDefs | ||
|
|
||
| def left : BinaryTree α → BinaryTree α | ||
| | .empty => .empty | ||
| | .node l _ _ => l | ||
|
|
||
| def right : BinaryTree α → BinaryTree α | ||
| | .empty => .empty | ||
| | .node _ _ r => r | ||
|
|
||
| theorem non_empty_exist (s : BinaryTree α) (h : s ≠ .empty) : | ||
| ∃ A k B, s = .node A k B := by | ||
| induction s <;> grind | ||
|
|
||
| def num_nodes : BinaryTree α → ℕ | ||
| | .empty => 0 | ||
| | .node l _ r => 1 + num_nodes l + num_nodes r | ||
|
|
||
| @[simp] lemma num_nodes_empty : num_nodes (empty : BinaryTree α) = 0 := rfl | ||
|
|
||
| @[simp] lemma num_nodes_node (l : BinaryTree α) (k : α) (r : BinaryTree α) : | ||
| (node l k r).num_nodes = 1 + l.num_nodes + r.num_nodes := rfl | ||
|
|
||
| /-- In-order traversal as a list of keys. -/ | ||
| def toKeyList : BinaryTree α → List α | ||
| | .empty => [] | ||
| | .node l k r => l.toKeyList ++ [k] ++ r.toKeyList | ||
|
|
||
| @[simp] lemma toKeyList_empty : toKeyList (empty : BinaryTree α) = [] := rfl | ||
|
|
||
| @[simp] lemma toKeyList_node (l : BinaryTree α) (k : α) (r : BinaryTree α) : | ||
| (node l k r).toKeyList = l.toKeyList ++ [k] ++ r.toKeyList := rfl | ||
|
|
||
| /-- Number of nodes on the search path for `q` in `t`. Zero on the empty | ||
| tree; on a node this counts the root plus (if `q ≠ k`) the search path | ||
| length in the appropriate subtree. -/ | ||
| def search_path_len [LinearOrder α] (t : BinaryTree α) (q : α) : ℕ := | ||
| match t with | ||
| | .empty => 0 | ||
| | .node l key r => | ||
| if q < key then | ||
| 1 + l.search_path_len q | ||
| else if key < q then | ||
| 1 + r.search_path_len q | ||
| else | ||
| 1 | ||
|
|
||
| /-- | ||
| Remark: | ||
| This implementation is not really a "contain function", | ||
| because a binary tree could have q >/< key while being in | ||
| the left/right subtree of key respectively. | ||
| If `contains t q` is true, then `q` is in `t`; but | ||
| the converse need not necessarily hold true. The | ||
| converse is true for a binary search tree. | ||
| -/ | ||
| def contains [LinearOrder α] (t : BinaryTree α) (q : α) : Prop := | ||
| match t with | ||
| | .empty => False | ||
| | .node l key r => | ||
| if q < key then | ||
| l.contains q | ||
| else if key < q then | ||
| r.contains q | ||
| else | ||
| True | ||
|
|
||
| end CoreDefs | ||
|
|
||
|
|
||
| /-! ### Rotations and Mirroring -/ | ||
| section Transformations | ||
|
|
||
| def rotateRight : BinaryTree α → BinaryTree α | ||
| | .node (.node a x b) y c => .node a x (.node b y c) | ||
| | t => t | ||
|
|
||
| def rotateLeft : BinaryTree α → BinaryTree α | ||
| | .node a x (.node b y c) => .node (.node a x b) y c | ||
| | t => t | ||
|
|
||
| /-- Mirror a binary tree: swap every left and right subtree. -/ | ||
| def mirror : BinaryTree α → BinaryTree α | ||
| | .empty => .empty | ||
| | .node l k r => .node r.mirror k l.mirror | ||
|
|
||
| @[simp] lemma mirror_empty : (empty : BinaryTree α).mirror = .empty := rfl | ||
|
|
||
| @[simp] lemma mirror_node (l : BinaryTree α) (k : α) (r : BinaryTree α) : | ||
| (node l k r).mirror = .node r.mirror k l.mirror := rfl | ||
|
|
||
| @[simp] lemma mirror_mirror (t : BinaryTree α) : t.mirror.mirror = t := by | ||
| induction t <;> simp_all | ||
|
|
||
| @[simp] lemma num_nodes_mirror (t : BinaryTree α) : t.mirror.num_nodes = t.num_nodes := by | ||
| induction t <;> simp_all [num_nodes]; omega | ||
|
|
||
| @[simp] lemma mirror_rotateRight (t : BinaryTree α) : | ||
| (rotateRight t).mirror = rotateLeft t.mirror := by | ||
| cases t; · rfl | ||
| rename_i l _ _ | ||
| cases l <;> rfl | ||
|
|
||
| @[simp] lemma mirror_rotateLeft (t : BinaryTree α) : | ||
| (rotateLeft t).mirror = rotateRight t.mirror := by | ||
| cases t; · rfl | ||
| rename_i _ _ r | ||
| cases r <;> rfl | ||
|
|
||
| @[simp] theorem num_nodes_rotateRight (t : BinaryTree α) : | ||
| (rotateRight t).num_nodes = t.num_nodes := by | ||
| rcases t with _ | ⟨(_ | ⟨ll, lk, lr⟩), k, r⟩ <;> | ||
| simp [rotateRight]; omega | ||
|
|
||
| @[simp] theorem num_nodes_rotateLeft (t : BinaryTree α) : | ||
| (rotateLeft t).num_nodes = t.num_nodes := by | ||
| have h := num_nodes_rotateRight t.mirror | ||
| simp only [← mirror_rotateLeft, num_nodes_mirror] at h; exact h | ||
|
|
||
| end Transformations | ||
|
|
||
|
|
||
| /-! ### Contains Characterizations -/ | ||
| section ContainsLemmas | ||
|
|
||
| @[simp] lemma not_contains_empty [LinearOrder α] (q : α) : | ||
| ¬ (empty : BinaryTree α).contains q := nofun | ||
|
|
||
| @[simp] lemma contains_node_lt [LinearOrder α] {l : BinaryTree α} {k q : α} | ||
| {r : BinaryTree α} (h : q < k) : | ||
| (node l k r).contains q ↔ l.contains q := by | ||
| simp [contains, h] | ||
|
|
||
| @[simp] lemma contains_node_gt [LinearOrder α] {l : BinaryTree α} {k q : α} | ||
| {r : BinaryTree α} (h : k < q) : | ||
| (node l k r).contains q ↔ r.contains q := by | ||
| simp [contains, h, not_lt_of_gt h] | ||
|
|
||
| @[simp] lemma contains_node_not_eq_not_lt [LinearOrder α] | ||
| {l : BinaryTree α} {k q : α} {r : BinaryTree α} | ||
| (h1 : ¬ q = k) (h2 : ¬ q < k) : | ||
| (node l k r).contains q ↔ r.contains q := by | ||
| have hgt : k < q := lt_of_le_of_ne (Std.not_lt.mp h2) (Ne.symm (Ne.intro h1)) | ||
| simp [contains, hgt, not_lt_of_gt hgt] | ||
|
|
||
| end ContainsLemmas | ||
|
|
||
|
|
||
| /-! ### Tree Invariants and BST Properties -/ | ||
| section Invariants | ||
|
|
||
| inductive ForallTree (p : α → Prop) : BinaryTree α → Prop | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just the induction principle of
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dont think that Keeping it makes pattern-matching easier on the tree constructors and goes well with |
||
| | left : ForallTree p .empty | ||
| | node l key r : | ||
| ForallTree p l → | ||
| p key → | ||
| ForallTree p r → | ||
| ForallTree p (.node l key r) | ||
|
|
||
| inductive IsBST [LinearOrder α] : BinaryTree α → Prop | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See the design of Batteries RBMap for defining these kinds of functions. Ideally this should be defined through a |
||
| | left : IsBST .empty | ||
| | node key l r : | ||
| ForallTree (fun k => k < key) l → | ||
| ForallTree (fun k => key < k) r → | ||
| IsBST l → IsBST r → | ||
| IsBST (.node l key r) | ||
|
|
||
| end Invariants | ||
|
|
||
|
|
||
| /-! ### Accessor Lemmas for ForallTree -/ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This entire section should fall out from API lemmas for fold and map. |
||
| section ForallTreeAccessors | ||
|
|
||
| @[simp] lemma ForallTree.left_sub {p : α → Prop} {l : BinaryTree α} {k : α} {r : BinaryTree α} | ||
| (h : ForallTree p (.node l k r)) : ForallTree p l := by | ||
| cases h with | node _ _ _ hl _ _ => exact hl | ||
|
|
||
| @[simp] lemma ForallTree.root {p : α → Prop} {l : BinaryTree α} {k : α} {r : BinaryTree α} | ||
| (h : ForallTree p (.node l k r)) : p k := by | ||
| cases h with | node _ _ _ _ hk _ => exact hk | ||
|
|
||
| @[simp] lemma ForallTree.right_sub {p : α → Prop} {l : BinaryTree α} {k : α} {r : BinaryTree α} | ||
| (h : ForallTree p (.node l k r)) : ForallTree p r := by | ||
| cases h with | node _ _ _ _ _ hr => exact hr | ||
|
|
||
| end ForallTreeAccessors | ||
|
|
||
|
|
||
| /-! ### Accessor Lemmas for IsBST -/ | ||
| section IsBSTAccessors | ||
|
|
||
| @[simp] lemma IsBST.forallTree_left [LinearOrder α] {l : BinaryTree α} {k : α} {r : BinaryTree α} | ||
| (h : IsBST (.node l k r)) : ForallTree (· < k) l := by | ||
| cases h with | node _ _ _ hl _ _ _ => exact hl | ||
|
|
||
| @[simp] lemma IsBST.forallTree_right [LinearOrder α] {l : BinaryTree α} {k : α} {r : BinaryTree α} | ||
| (h : IsBST (.node l k r)) : ForallTree (k < ·) r := by | ||
| cases h with | node _ _ _ _ hr _ _ => exact hr | ||
|
|
||
| @[simp] lemma IsBST.left_bst [LinearOrder α] {l : BinaryTree α} {k : α} {r : BinaryTree α} | ||
| (h : IsBST (.node l k r)) : IsBST l := by | ||
| cases h with | node _ _ _ _ _ hl _ => exact hl | ||
|
|
||
| @[simp] lemma IsBST.right_bst [LinearOrder α] {l : BinaryTree α} {k : α} {r : BinaryTree α} | ||
| (h : IsBST (.node l k r)) : IsBST r := by | ||
| cases h with | node _ _ _ _ _ _ hr => exact hr | ||
|
|
||
| end IsBSTAccessors | ||
|
|
||
| end BinaryTree | ||
|
|
||
|
|
||
| /-! ### BST Structure -/ | ||
| section BSTStructure | ||
|
|
||
| structure BST (α : Type) [LinearOrder α] where | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not convinced by the bundling here.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you elaborate? What is bad? What should be a better choice?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Chris is right. This is an unnecessary bundling. You can operate solely on BinaryTree and insert
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Most of the proofs in the PR were based on a separate tree and IsBST property. We create an API specifically for people who can use BST. In some use cases, it is more convenient to refer to a BST as a type rather than as a single tree with its properties. Sometimes you don't want to carry these properties around all the time.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please do ask on Zulip if you'd like to hear other opinions, but unbundling these sort of propositions is a well-established best practice. I think this should be a
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, will do.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Consider the fact that a library PR must be built with future re-use in mind, beyond just this PR. If you create a tree definition it can be reused in several places where it need not be a bst. Secondly, this means you can actually work with trees from mathlib. |
||
| tree : BinaryTree α | ||
| hBST : BinaryTree.IsBST tree | ||
|
|
||
| namespace BST | ||
|
|
||
| /-- Checks if the BST contains a given key by delegating to the underlying tree. -/ | ||
| def contains [LinearOrder α] (t : BST α) (q : α) : Prop := | ||
| t.tree.contains q | ||
|
|
||
| end BST | ||
|
|
||
| end BSTStructure | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| /- | ||
| Copyright (c) 2026 Sorrachai Yingchareonthawornchai. All rights reserved. | ||
| Released under Apache 2.0 license as described in the file LICENSE. | ||
| Authors: Anton Kovsharov, Antoine du Fresne von Hohenesche, | ||
| Sorrachai Yingchareonthawornchai | ||
| -/ | ||
|
|
||
| import Cslib.Foundations.Data.SplayTree.Complexity | ||
| import Cslib.Foundations.Data.SplayTree.Correctness | ||
|
|
||
| /-! | ||
| # Splay Tree API for BST | ||
|
|
||
| This module provides a high-level API for splaying directly on the `BST` type. | ||
| It encapsulates the raw `BinaryTree` operations and their associated invariant | ||
| proofs, allowing users to safely and cleanly manipulate BSTs without manually | ||
| re-proving the `IsBST` invariant after every rotation. | ||
| -/ | ||
|
|
||
| variable {α : Type} [LinearOrder α] | ||
|
|
||
| namespace SplayTree.BSTAPI | ||
|
|
||
| open SplayTree | ||
| open BinaryTree | ||
|
|
||
| /-! ### Core Operation -/ | ||
|
|
||
| /-- | ||
| Splays a key `q` in the given BST `t`. | ||
| Returns a new `BST` and automatically resolves the `IsBST` invariant internally. | ||
| -/ | ||
| def splay (t : BST α) (q : α) : BST α := | ||
| ⟨SplayTree.splay t.tree q, IsBST_splay t.tree q t.hBST⟩ | ||
|
|
||
| /-! ### Correctness -/ | ||
|
|
||
| /-- If the underlying BST contains a key, splaying brings it to the root. -/ | ||
| theorem splay_root_of_contains (t : BST α) (q : α) (hc : t.contains q) : | ||
| ∃ l r, (splay t q).tree = .node l q r := | ||
| SplayTree.splay_root_of_contains t.tree q hc | ||
|
|
||
| /-- Splaying preserves the in-order traversal (and therefore the exact elements) of the BST. -/ | ||
| @[simp] | ||
| theorem toKeyList_splay (t : BST α) (q : α) : | ||
| (splay t q).tree.toKeyList = t.tree.toKeyList := | ||
| SplayTree.toKeyList_splay t.tree q | ||
|
|
||
| /-- Splaying preserves the exact number of nodes. -/ | ||
| @[simp] | ||
| theorem num_nodes_splay (t : BST α) (q : α) : | ||
| (splay t q).tree.num_nodes = t.tree.num_nodes := | ||
| SplayTree.num_nodes_splay t.tree q | ||
|
|
||
| /-! ### Complexity -/ | ||
|
|
||
| /-- The potential Φ of a BST, inherited directly from its underlying tree. -/ | ||
| noncomputable def φ (t : BST α) : ℝ := SplayTree.φ t.tree | ||
|
|
||
| /-- The concrete operational cost of splaying a key in the BST. -/ | ||
| noncomputable def splayCost (t : BST α) (q : α) : ℝ := SplayTree.splay.cost t.tree q | ||
|
|
||
| /-- | ||
| The core O(log n) amortized bound reformulated for the safe `BST` type. | ||
| The potential change plus the actual cost is bounded by `3 * log_2(n) + 1`. | ||
| -/ | ||
| theorem splay_amortized_bound (t : BST α) (q : α) : | ||
| φ (splay t q) - φ t + splayCost t q ≤ 3 * Real.logb 2 t.tree.num_nodes + 1 := | ||
| SplayTree.splay_amortized_bound t.tree q | ||
|
|
||
|
|
||
| /-! ### Sequence Cost and Total Complexity Bound -/ | ||
|
|
||
| /-- The total cost of a sequence of `m` splays, defined directly on the initial BST. -/ | ||
| noncomputable def sequenceCost {m : ℕ} (init : BST α) (X : Fin m → α) : ℝ := | ||
| SplayTree.splay.sequence_cost init.tree X | ||
|
|
||
| /-- | ||
| The classical total sequence cost bound for `m` operations on a BST of size `n`. | ||
| It guarantees that the total sequence cost is bounded by O(m log n + n log n). | ||
| -/ | ||
| theorem nlogn_cost (n m : ℕ) (X : Fin m → α) | ||
| (init : BST α) (h_size : init.tree.num_nodes = n) : | ||
| sequenceCost init X ≤ m * (3 * Real.logb 2 n + 1) + n * Real.logb 2 n := | ||
| SplayTree.nlogn_cost n m X init.tree h_size | ||
|
|
||
| end SplayTree.BSTAPI |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems this is the same as Mathlib.Data.Tree.Basic?