perf(lambda-rs): Compute the determinant via Gaussian elimination for smaller matrices (#194)

vmarcella · web-flow · commit 52e6a349fe75 · 2026-03-12T13:43:53.000-07:00
## Summary

Optimize determinant computation in `lambda-rs` matrix math by replacing
recursive Laplace expansion with Gaussian elimination plus partial
pivoting. This reduces determinant evaluation from factorial-time growth
to cubic-time growth for square matrices and adds stack-allocated fast
paths for common 3x3 and 4x4 cases.

## Related Issues

None linked.

## Changes

- Replace recursive determinant expansion in
`crates/lambda-rs/src/math/matrix.rs` with Gaussian elimination and
partial pivoting.
- Add fixed-size stack fast paths for 3x3 and 4x4 matrices to avoid heap
allocation in common transform-sized matrices.
- Change the larger-matrix fallback scratch space from `Vec&lt;Vec&lt;f32&gt;&gt;`
to a contiguous row-major `Vec&lt;f32&gt;` for better cache locality and fewer
allocations.

## Type of Change

- [ ] Bug fix (non-breaking change that fixes an issue)
- [ ] Feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [x] Documentation (updates to docs, specs, tutorials, or comments)
- [ ] Refactor (code change that neither fixes a bug nor adds a feature)
- [x] Performance (change that improves performance)
- [ ] Test (adding or updating tests)
- [ ] Build/CI (changes to build process or CI configuration)

## Affected Crates

- [x] `lambda-rs`
- [ ] `lambda-rs-platform`
- [ ] `lambda-rs-args`
- [ ] `lambda-rs-logging`
- [ ] Other:

## Checklist

- [ ] Code follows the repository style guidelines (`cargo +nightly fmt
--all`)
- [ ] Code passes clippy (`cargo clippy --workspace --all-targets -- -D
warnings`)
- [ ] Tests pass (`cargo test --workspace`)
- [x] New code includes appropriate documentation
- [ ] Public API changes are documented
- [ ] Breaking changes are noted in this PR description

## Testing

**Commands run:**

```bash
cargo test -p lambda-rs math::matrix
```

**Manual verification steps (if applicable):**

1. Confirm `determinant()` uses stack fast paths for 3x3 and 4x4
matrices.
2. Confirm larger matrices use a contiguous flat scratch buffer instead
of nested vectors.

## Screenshots/Recordings

Not applicable.

## Platform Testing

- [x] macOS
- [ ] Windows
- [ ] Linux

## Additional Notes

Asymptotic runtime for determinant computation improves from `O(n!)`
under recursive Laplace expansion to `O(n^3)` with Gaussian elimination.
Space remains `O(n^2)`, with lower constant overhead for both fixed-size
and larger-matrix paths.
diff --git a/crates/lambda-rs/src/math/matrix.rs b/crates/lambda-rs/src/math/matrix.rs
@@ -259,6 +259,63 @@ pub fn identity_matrix<
   return result;
 }
 
+/// Compute determinant via Gaussian elimination on fixed-size stack storage.
+///
+/// This avoids heap allocations in hot fixed-size paths (for example 4x4
+/// transform matrices).
+///
+/// # Arguments
+///
+/// - `data`: Square matrix values stored in stack-allocated fixed-size arrays.
+///
+/// # Returns
+///
+/// - Determinant of `data`.
+/// - `0.0` when elimination detects an exact zero pivot after pivoting
+///   (singular matrix).
+fn determinant_gaussian_stack<const N: usize>(mut data: [[f32; N]; N]) -> f32 {
+  let mut sign = 1.0_f32;
+  for pivot in 0..N {
+    let mut pivot_row = pivot;
+    let mut pivot_abs = data[pivot][pivot].abs();
+    for (candidate, row) in data.iter().enumerate().skip(pivot + 1) {
+      let candidate_abs = row[pivot].abs();
+      if candidate_abs > pivot_abs {
+        pivot_abs = candidate_abs;
+        pivot_row = candidate;
+      }
+    }
+
+    if pivot_abs == 0.0 {
+      return 0.0;
+    }
+
+    if pivot_row != pivot {
+      data.swap(pivot, pivot_row);
+      sign = -sign;
+    }
+
+    let pivot_value = data[pivot][pivot];
+    let pivot_tail = data[pivot];
+    for row in data.iter_mut().skip(pivot + 1) {
+      let factor = row[pivot] / pivot_value;
+      if factor == 0.0 {
+        continue;
+      }
+      for (dst, src) in row[pivot..].iter_mut().zip(pivot_tail[pivot..].iter())
+      {
+        *dst -= factor * *src;
+      }
+    }
+  }
+
+  let mut det = sign;
+  for (i, row) in data.iter().enumerate().take(N) {
+    det *= row[i];
+  }
+  return det;
+}
+
 // -------------------------- ARRAY IMPLEMENTATION -----------------------------
 
 /// Matrix implementations for arrays of f32 arrays. Including the trait Matrix into
@@ -325,7 +382,15 @@ where
     todo!()
   }
 
-  /// Computes the determinant of any square matrix using Laplace expansion.
+  /// Computes the determinant of any square matrix using Gaussian elimination
+  /// with partial pivoting.
+  ///
+  /// # Behavior
+  ///
+  /// - Uses stack-allocated elimination for 3x3 and 4x4 matrices to avoid heap
+  ///   allocation on common transform sizes.
+  /// - Uses a generic dense fallback for larger matrices.
+  /// - Overall asymptotic runtime is `O(n^3)` for square `n x n` matrices.
   fn determinant(&self) -> Result<f32, MathError> {
     let rows = self.as_ref().len();
     if rows == 0 {
@@ -341,34 +406,88 @@ where
       return Err(MathError::NonSquareMatrix { rows, cols });
     }
 
-    return match rows {
-      1 => Ok(self.as_ref()[0].as_ref()[0]),
-      2 => {
-        let a = self.at(0, 0);
-        let b = self.at(0, 1);
-        let c = self.at(1, 0);
-        let d = self.at(1, 1);
-        return Ok(a * d - b * c);
+    if rows == 1 {
+      return Ok(self.as_ref()[0].as_ref()[0]);
+    }
+
+    if rows == 2 {
+      let a = self.at(0, 0);
+      let b = self.at(0, 1);
+      let c = self.at(1, 0);
+      let d = self.at(1, 1);
+      return Ok(a * d - b * c);
+    }
+
+    if rows == 3 {
+      let mut data = [[0.0_f32; 3]; 3];
+      for (i, row) in data.iter_mut().enumerate() {
+        for (j, value) in row.iter_mut().enumerate() {
+          *value = self.at(i, j);
+        }
+      }
+      return Ok(determinant_gaussian_stack::<3>(data));
+    }
+
+    if rows == 4 {
+      let mut data = [[0.0_f32; 4]; 4];
+      for (i, row) in data.iter_mut().enumerate() {
+        for (j, value) in row.iter_mut().enumerate() {
+          *value = self.at(i, j);
+        }
+      }
+      return Ok(determinant_gaussian_stack::<4>(data));
+    }
+
+    // Use a contiguous row-major buffer for larger matrices to keep the
+    // fallback cache-friendlier than `Vec<Vec<f32>>`.
+    let mut working = vec![0.0_f32; rows * rows];
+    for i in 0..rows {
+      for j in 0..rows {
+        working[i * rows + j] = self.at(i, j);
       }
-      _ => {
-        let mut result = 0.0;
-        for i in 0..rows {
-          let mut submatrix: Vec<Vec<f32>> = Vec::with_capacity(rows - 1);
-          for j in 1..rows {
-            let mut row = Vec::new();
-            for k in 0..rows {
-              if k != i {
-                row.push(self.at(j, k));
-              }
-            }
-            submatrix.push(row);
-          }
-          let sub_determinant = submatrix.determinant()?;
-          result += self.at(0, i) * sub_determinant * (-1.0_f32).powi(i as i32);
+    }
+
+    let mut sign = 1.0_f32;
+    for pivot in 0..rows {
+      // Partial pivoting improves numerical stability and avoids division by 0.
+      let mut pivot_row = pivot;
+      let mut pivot_abs = working[pivot * rows + pivot].abs();
+      for candidate in (pivot + 1)..rows {
+        let candidate_abs = working[candidate * rows + pivot].abs();
+        if candidate_abs > pivot_abs {
+          pivot_abs = candidate_abs;
+          pivot_row = candidate;
         }
-        return Ok(result);
       }
-    };
+
+      if pivot_abs == 0.0 {
+        return Ok(0.0);
+      }
+
+      if pivot_row != pivot {
+        for column in 0..rows {
+          working.swap(pivot * rows + column, pivot_row * rows + column);
+        }
+        sign = -sign;
+      }
+
+      let pivot_value = working[pivot * rows + pivot];
+      for row in (pivot + 1)..rows {
+        let factor = working[row * rows + pivot] / pivot_value;
+        if factor == 0.0 {
+          continue;
+        }
+        for column in pivot..rows {
+          let row_idx = row * rows + column;
+          let pivot_idx = pivot * rows + column;
+          working[row_idx] -= factor * working[pivot_idx];
+        }
+      }
+    }
+
+    let diagonal_product =
+      (0..rows).map(|i| working[i * rows + i]).product::<f32>();
+    return Ok(sign * diagonal_product);
   }
 
   /// Return the size as a (rows, columns).
@@ -394,7 +513,6 @@ where
 
 #[cfg(test)]
 mod tests {
-
   use super::{
     filled_matrix,
     identity_matrix,
@@ -454,6 +572,55 @@ mod tests {
     assert_eq!(m2.determinant(), Ok(-306.0));
   }
 
+  #[test]
+  fn determinant_4x4_stack_path_handles_row_swap_sign() {
+    let m = [
+      [0.0, 2.0, 0.0, 0.0],
+      [1.0, 0.0, 0.0, 0.0],
+      [0.0, 0.0, 3.0, 0.0],
+      [0.0, 0.0, 0.0, 4.0],
+    ];
+    assert_eq!(m.determinant(), Ok(-24.0));
+  }
+
+  #[test]
+  fn determinant_5x5_dynamic_path_handles_row_swap_sign() {
+    let m = [
+      [0.0, 2.0, 0.0, 0.0, 0.0],
+      [1.0, 0.0, 0.0, 0.0, 0.0],
+      [0.0, 0.0, 3.0, 0.0, 0.0],
+      [0.0, 0.0, 0.0, 4.0, 0.0],
+      [0.0, 0.0, 0.0, 0.0, 5.0],
+    ];
+    assert_eq!(m.determinant(), Ok(-120.0));
+  }
+
+  #[test]
+  fn determinant_preserves_small_non_zero_pivot_in_stack_path() {
+    let m = [[1.0e-8, 0.0, 0.0], [0.0, 2.0, 0.0], [0.0, 0.0, 3.0]];
+    crate::assert_approximately_equal!(
+      m.determinant().expect("determinant should succeed"),
+      6.0e-8,
+      1.0e-12
+    );
+  }
+
+  #[test]
+  fn determinant_preserves_small_non_zero_pivot_in_dynamic_path() {
+    let m = [
+      [1.0e-8, 0.0, 0.0, 0.0, 0.0],
+      [0.0, 2.0, 0.0, 0.0, 0.0],
+      [0.0, 0.0, 3.0, 0.0, 0.0],
+      [0.0, 0.0, 0.0, 4.0, 0.0],
+      [0.0, 0.0, 0.0, 0.0, 5.0],
+    ];
+    crate::assert_approximately_equal!(
+      m.determinant().expect("determinant should succeed"),
+      1.2e-6,
+      1.0e-10
+    );
+  }
+
   #[test]
   fn non_square_matrix_determinant() {
     let m = [[3.0, 8.0], [4.0, 6.0], [0.0, 1.0]];