Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions pytensor/tensor/slinalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,6 +1014,22 @@ def perform(self, node, inputs, outputs):
except np.linalg.LinAlgError:
outputs[0][0] = np.full(a.shape, np.nan, dtype=a.dtype)

def L_op(self, inputs, outputs, output_gradients):
res = super().L_op(inputs, outputs, output_gradients)

if self.assume_a in ("sym", "her", "pos"):
A_bar = res[0]
# When assume_a is sym/her/pos, the solver only reads one triangle
# of A and symmetrizes internally. Off-diagonal elements in the read
# triangle contribute to both (i,j) and (j,i) of the effective matrix,
# so we must accumulate the symmetric contribution and zero the unread triangle.
if self.lower:
res[0] = ptb.tril(A_bar) + ptb.tril(A_bar.mT, -1)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
res[0] = ptb.tril(A_bar) + ptb.tril(A_bar.mT, -1)
res[0] = ptb.tril(A_bar) + ptb.tril(A_bar.conj().mT, -1)

Otherwise the hermetian case is wrong

else:
res[0] = ptb.triu(A_bar) + ptb.triu(A_bar.mT, 1)

return res

def inplace_on_inputs(self, allowed_inplace_inputs: list[int]) -> "Op":
if not allowed_inplace_inputs:
return self
Expand Down
63 changes: 63 additions & 0 deletions tests/tensor/test_slinalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,69 @@ def test_solve_gradient(
lambda A, b: solve_op(A_func(A), b), [A_val, b_val], 3, rng, eps=eps
)

@pytest.mark.parametrize("b_shape", [(5, 1), (5,)], ids=["b_col_vec", "b_vec"])
@pytest.mark.parametrize(
"assume_a, lower",
[
("sym", False),
("sym", True),
("pos", False),
("pos", True),
],
ids=["sym_upper", "sym_lower", "pos_upper", "pos_lower"],
)
@pytest.mark.skipif(
config.floatX == "float32",
reason="Gradients not numerically stable in float32",
)
def test_solve_symmetric_gradient_direct(
self, b_shape: tuple[int], assume_a: str, lower: bool
):
"""Test that the gradient of Solve is correct when a pre-structured
matrix is passed directly, without composing with a symmetrization
wrapper. This catches bugs where L_op doesn't account for the solver
only reading one triangle of A."""
rng = np.random.default_rng(utt.fetch_seed())

A_raw = rng.normal(size=(5, 5)).astype(config.floatX)
if assume_a == "pos":
A_val = (A_raw @ A_raw.T + 5 * np.eye(5)).astype(config.floatX)
else:
A_val = ((A_raw + A_raw.T) / 2).astype(config.floatX)
b_val = rng.normal(size=b_shape).astype(config.floatX)

A = pt.tensor("A", shape=(5, 5))
b = pt.tensor("b", shape=b_shape)
x = solve(A, b, assume_a=assume_a, lower=lower, b_ndim=len(b_shape))
loss = x.sum()
g_A = grad(loss, A)
f = function([A, b], g_A)

analytic = f(A_val, b_val)

# Numerical gradient: perturb only the read triangle
Copy link
Copy Markdown
Member

@ricardoV94 ricardoV94 Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we concot a graph that allows us to use verify_grad instead and still works as regression test?

Something whose input is just the triangular entries? I'm assuming they were being half counted?

You can still verify they came out as zeros on an explicit grad fn

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we concot a graph that allows us to use verify_grad instead and still works as regression test?

Something whose input is just the triangular entries? I'm assuming they were being half counted?

You can still verify they came out as zeros on an explicit grad fn

Thanks for the suggestion i've updated the test accordingly

The manual finite-difference loop has been replaced with utt.verify_grad using a triangular parameterization of the structured entries the symmetric matrix is reconstructed inside the graph and i’ve also added an explicit assertion that the unread triangle has zero gradients
all parametrized cases are passing locally

eps = 1e-7
numerical = np.zeros_like(A_val)
for i in range(5):
for j in range(5):
if lower and j > i:
continue
if not lower and j < i:
continue
A_plus = A_val.copy()
A_plus[i, j] += eps
A_minus = A_val.copy()
A_minus[i, j] -= eps
x_plus = scipy_linalg.solve(
A_plus, b_val, assume_a=assume_a, lower=lower
)
x_minus = scipy_linalg.solve(
A_minus, b_val, assume_a=assume_a, lower=lower
)
numerical[i, j] = (x_plus.sum() - x_minus.sum()) / (2 * eps)

np.testing.assert_allclose(analytic, numerical, atol=1e-5, rtol=1e-5)

def test_solve_tringular_indirection(self):
a = pt.matrix("a")
b = pt.vector("b")
Expand Down
Loading