feat(lean4): add formal verification specs for ensemble models

Lean 4 formalization of the decision tree + MLP ensemble architecture. Axiomatizes Float properties (sigmoid bounds, ReLU nonnegativity) since Lean's Float ops are extern-backed. Proves MLP output is bounded in (0,1) and ensemble output is always a valid decision. No mathlib dependency. Signed-off-by: Sienna Meridian Satterwhite <sienna@sunbeam.pt>
2026-03-10 23:38:21 +00:00
parent 5daed3ecb0
commit 982cf5755d
12 changed files with 314 additions and 0 deletions
--- a/lean4/Sunbeam/Model/Basic.lean
+++ b/lean4/Sunbeam/Model/Basic.lean
@@ -0,0 +1,25 @@
+namespace Sunbeam
+
+/-- Decisions that a model component can output. -/
+inductive Decision where
+  | block : Decision
+  | allow : Decision
+  | defer : Decision
+  deriving Repr, DecidableEq
+
+/-- A fixed-size vector of floats. -/
+def FloatVec (n : Nat) := Fin n → Float
+
+/-- Dot product of two float vectors. -/
+def dot {n : Nat} (a b : FloatVec n) : Float :=
+  (List.finRange n).foldl (fun acc i => acc + a i * b i) 0.0
+
+/-- Matrix-vector product. Matrix is row-major: m rows × n cols. -/
+def matVecMul {m n : Nat} (mat : Fin m → FloatVec n) (v : FloatVec n) : FloatVec m :=
+  fun i => dot (mat i) v
+
+/-- Vector addition. -/
+def vecAdd {n : Nat} (a b : FloatVec n) : FloatVec n :=
+  fun i => a i + b i
+
+end Sunbeam
--- a/lean4/Sunbeam/Model/DecisionTree.lean
+++ b/lean4/Sunbeam/Model/DecisionTree.lean
@@ -0,0 +1,23 @@
+import Sunbeam.Model.Basic
+
+namespace Sunbeam
+
+/-- A decision tree node (inductive = automatic termination for structural recursion). -/
+inductive TreeNode where
+  | leaf (decision : Decision) : TreeNode
+  | split (featureIdx : Nat) (threshold : Float) (left right : TreeNode) : TreeNode
+  deriving Repr
+
+/-- Tree prediction by structural recursion (termination is automatic). -/
+def treePredictAux {n : Nat} (input : Fin n → Float) : TreeNode → Decision
+  | .leaf d => d
+  | .split idx thr left right =>
+    if h : idx < n then
+      if input ⟨idx, h⟩ ≤ thr then
+        treePredictAux input left
+      else
+        treePredictAux input right
+    else
+      Decision.defer
+
+end Sunbeam
--- a/lean4/Sunbeam/Model/Ensemble.lean
+++ b/lean4/Sunbeam/Model/Ensemble.lean
@@ -0,0 +1,42 @@
+import Sunbeam.Model.Basic
+import Sunbeam.Model.MLP
+import Sunbeam.Model.DecisionTree
+
+namespace Sunbeam
+
+/-- Ensemble: tree decides first; MLP handles only Defer cases. -/
+def ensemblePredict {inputDim hiddenDim : Nat}
+    (tree : TreeNode) (mlpWeights : MLPWeights inputDim hiddenDim)
+    (threshold : Float) (input : FloatVec inputDim) : Decision :=
+  match treePredictAux input tree with
+  | Decision.block => Decision.block
+  | Decision.allow => Decision.allow
+  | Decision.defer =>
+    let score := mlpForward mlpWeights input
+    if score > threshold then Decision.block else Decision.allow
+
+/-- If the tree says Block, the ensemble says Block. -/
+theorem tree_block_implies_ensemble_block {inputDim hiddenDim : Nat}
+    (tree : TreeNode) (mlpWeights : MLPWeights inputDim hiddenDim)
+    (threshold : Float) (input : FloatVec inputDim)
+    (h : treePredictAux input tree = Decision.block) :
+    ensemblePredict tree mlpWeights threshold input = Decision.block := by
+  unfold ensemblePredict
+  rw [h]
+
+/-- Ensemble output is always Block or Allow (never Defer). -/
+theorem ensemble_output_valid {inputDim hiddenDim : Nat}
+    (tree : TreeNode) (mlpWeights : MLPWeights inputDim hiddenDim)
+    (threshold : Float) (input : FloatVec inputDim) :
+    ensemblePredict tree mlpWeights threshold input = Decision.block ∨
+    ensemblePredict tree mlpWeights threshold input = Decision.allow := by
+  unfold ensemblePredict
+  split
+  · left; rfl
+  · right; rfl
+  · dsimp only []
+    split
+    · left; rfl
+    · right; rfl
+
+end Sunbeam
--- a/lean4/Sunbeam/Model/MLP.lean
+++ b/lean4/Sunbeam/Model/MLP.lean
@@ -0,0 +1,30 @@
+import Sunbeam.Model.Basic
+import Sunbeam.Model.Sigmoid
+import Sunbeam.Model.ReLU
+
+namespace Sunbeam
+
+/-- Weights for a 2-layer MLP (input → hidden → scalar output). -/
+structure MLPWeights (inputDim hiddenDim : Nat) where
+  w1 : Fin hiddenDim → FloatVec inputDim
+  b1 : FloatVec hiddenDim
+  w2 : FloatVec hiddenDim
+  b2 : Float
+
+/-- Forward pass: input → linear1 → ReLU → linear2 → sigmoid. -/
+def mlpForward {inputDim hiddenDim : Nat}
+    (weights : MLPWeights inputDim hiddenDim) (input : FloatVec inputDim) : Float :=
+  let hidden := vecAdd (matVecMul weights.w1 input) weights.b1
+  let activated := reluVec hidden
+  let output := dot weights.w2 activated + weights.b2
+  sigmoid output
+
+/-- MLP output is bounded in (0, 1) — follows directly from sigmoid bounds. -/
+theorem mlp_output_bounded {inputDim hiddenDim : Nat}
+    (weights : MLPWeights inputDim hiddenDim) (input : FloatVec inputDim) :
+    0 < mlpForward weights input ∧ mlpForward weights input < 1 := by
+  constructor
+  · exact sigmoid_pos _
+  · exact sigmoid_lt_one _
+
+end Sunbeam
--- a/lean4/Sunbeam/Model/ReLU.lean
+++ b/lean4/Sunbeam/Model/ReLU.lean
@@ -0,0 +1,21 @@
+import Sunbeam.Model.Basic
+
+namespace Sunbeam
+
+/-- ReLU activation: max(0, x). -/
+def relu (x : Float) : Float :=
+  if x > 0.0 then x else 0.0
+
+/-- Pointwise ReLU on a vector. -/
+def reluVec {n : Nat} (v : FloatVec n) : FloatVec n :=
+  fun i => relu (v i)
+
+/-! ## Trust boundary: ReLU axioms -/
+
+/-- ReLU output is non-negative. -/
+axiom relu_nonneg (x : Float) : relu x ≥ 0.0
+
+/-- ReLU is monotone. -/
+axiom relu_monotone {x y : Float} (h : x ≤ y) : relu x ≤ relu y
+
+end Sunbeam
--- a/lean4/Sunbeam/Model/Sigmoid.lean
+++ b/lean4/Sunbeam/Model/Sigmoid.lean
@@ -0,0 +1,29 @@
+import Sunbeam.Model.Basic
+
+namespace Sunbeam
+
+/-- The sigmoid function σ(x) = 1 / (1 + exp(-x)). -/
+def sigmoid (x : Float) : Float :=
+  1.0 / (1.0 + Float.exp (-x))
+
+/-! ## Trust boundary: sigmoid axioms
+
+These axioms capture IEEE-754 properties of sigmoid that hold for all finite
+float inputs. They cannot be proved inside Lean because `Float` operations are
+`@[extern]` (opaque to the kernel). The axioms form a documented trust boundary:
+we trust the C runtime's `exp` implementation.
+
+When TorchLean ships its verified Float32 kernel, these axioms can be replaced
+with proofs against that kernel.
+-/
+
+/-- Sigmoid output is always positive: exp(-x) ≥ 0 ⟹ 1+exp(-x) ≥ 1 ⟹ 1/(1+exp(-x)) > 0. -/
+axiom sigmoid_pos (x : Float) : sigmoid x > 0
+
+/-- Sigmoid output is always less than 1: 1+exp(-x) > 1 ⟹ 1/(1+exp(-x)) < 1. -/
+axiom sigmoid_lt_one (x : Float) : sigmoid x < 1
+
+/-- Sigmoid is monotonically increasing (derivative = σ(1-σ) > 0). -/
+axiom sigmoid_monotone {x y : Float} (h : x ≤ y) : sigmoid x ≤ sigmoid y
+
+end Sunbeam
--- a/lean4/Sunbeam/Verify/Structural.lean
+++ b/lean4/Sunbeam/Verify/Structural.lean
@@ -0,0 +1,28 @@
+import Sunbeam.Model.Sigmoid
+import Sunbeam.Model.ReLU
+import Sunbeam.Model.MLP
+import Sunbeam.Model.DecisionTree
+import Sunbeam.Model.Ensemble
+
+namespace Sunbeam.Verify
+
+/-! # Tier 1: Structural properties (hold for ANY model weights)
+
+## Axioms (trust boundary — Float operations are opaque to Lean's kernel)
+- `sigmoid_pos`: σ(x) > 0
+- `sigmoid_lt_one`: σ(x) < 1
+- `sigmoid_monotone`: x ≤ y → σ(x) ≤ σ(y)
+- `relu_nonneg`: relu(x) ≥ 0
+- `relu_monotone`: x ≤ y → relu(x) ≤ relu(y)
+
+## Theorems (proved from axioms + structural reasoning)
+- `mlp_output_bounded`: 0 < mlpForward w x ∧ mlpForward w x < 1
+- `tree_block_implies_ensemble_block`: tree = Block → ensemble = Block
+- `ensemble_output_valid`: ensemble ∈ {Block, Allow} (never Defer)
+
+## Automatic guarantees
+- All tree predictions terminate (structural recursion on `TreeNode` inductive)
+- Ensemble composition is total (all match arms covered)
+-/
+
+end Sunbeam.Verify