pymc-devs · ricardoV94 · Mar 27, 2025 · Mar 19, 2025 · Mar 19, 2025 · Mar 21, 2025
diff --git a/pytensor/tensor/nlinalg.py b/pytensor/tensor/nlinalg.py
@@ -512,6 +512,80 @@ def perform(self, node, inputs, outputs):
         else:
             outputs[0][0] = res
 
+    def L_op(self, inputs, outputs, output_grads):
+        """
+        Reverse-mode gradient of the QR function. Adapted from ..[1], which is used in the forward-mode implementation in jax here:
+        https://github.com/jax-ml/jax/blob/54691b125ab4b6f88c751dae460e4d51f5cf834a/jax/_src/lax/linalg.py#L1803
+
+        And from ..[2] which describes a solution in the square matrix case.
+
+        References
+        ----------
+        .. [1] Townsend, James. "Differentiating the qr decomposition." online draft https://j-towns.github.io/papers/qr-derivative.pdf (2018)
+        .. [2] Sebastian F. Walter , Lutz Lehmann & René Lamour. "On evaluating higher-order derivatives
+        of the QR decomposition of tall matrices with full column rank in forward and reverse mode algorithmic differentiation",
+        Optimization Methods and Software, 27:2, 391-403, DOI: 10.1080/10556788.2011.610454
+        """
+
+        from pytensor.tensor.slinalg import solve_triangular
+
+        (A,) = (cast(ptb.TensorVariable, x) for x in inputs)
+        *_, m, n = A.type.shape
+
+        def _H(x: ptb.TensorVariable):
+            return x.conj().mT
+
+        def _copyutl(x: ptb.TensorVariable):
+            return ptb.triu(x, k=0) + _H(ptb.triu(x, k=1))
+
+        if self.mode == "raw" or (self.mode == "complete" and m != n):
+            raise NotImplementedError("Gradient of qr not implemented")
+
+        elif m < n:
+            raise NotImplementedError(
+                "Gradient of qr not implemented for m x n matrices with m < n"
+            )
+
+        elif self.mode == "r":
+            # We need all the components of the QR to compute the gradient of A even if we only
+            # use the upper triangular component in the cost function.
+            Q, R = qr(A, mode="reduced")
+            dR = cast(ptb.TensorVariable, output_grads[0])
+            R_dRt = R @ _H(dR)
+            M = ptb.tril(R_dRt - _H(R_dRt), k=-1)
+            M_Rinvt = _H(solve_triangular(R, _H(M)))
+            A_bar = Q @ (M_Rinvt + dR)
+            return [A_bar]
+
+        else:
+            Q, R = (cast(ptb.TensorVariable, x) for x in outputs)
+
+            new_output_grads = []
+            is_disconnected = [
+                isinstance(x.type, DisconnectedType) for x in output_grads
+            ]
+            if all(is_disconnected):
+                # This should never be reached by Pytensor
+                return [DisconnectedType()()]  # pragma: no cover
+
+            for disconnected, output_grad, output in zip(
+                is_disconnected, output_grads, [Q, R], strict=True
+            ):
+                if disconnected:
+                    new_output_grads.append(output.zeros_like())
+                else:
+                    new_output_grads.append(output_grad)
+
+            (dQ, dR) = (cast(ptb.TensorVariable, x) for x in new_output_grads)
+
+            Qt_dQ = _H(Q) @ dQ
+            R_dRt = R @ _H(dR)
+            M = Q @ (ptb.tril(R_dRt - _H(R_dRt), k=-1) - _copyutl(Qt_dQ)) + dQ
+            M_Rinvt = _H(solve_triangular(R, _H(M)))
+            A_bar = M_Rinvt + Q @ dR
+
+            return [A_bar]
+
 
 def qr(a, mode="reduced"):
     """

diff --git a/tests/tensor/test_nlinalg.py b/tests/tensor/test_nlinalg.py
@@ -152,6 +152,58 @@ def test_qr_modes():
         assert "name 'complete' is not defined" in str(e)
 
 
+@pytest.mark.parametrize(
+    "shape, gradient_test_case, mode",
+    (
+        [(s, c, "reduced") for s in [(3, 3), (6, 3), (3, 6)] for c in [0, 1, 2]]
+        + [(s, c, "complete") for s in [(3, 3), (6, 3), (3, 6)] for c in [0, 1, 2]]
+        + [(s, 0, "r") for s in [(3, 3), (6, 3), (3, 6)]]
+        + [((3, 3), 0, "raw")]
+    ),
+    ids=(
+        [
+            f"shape={s}, gradient_test_case={c}, mode=reduced"
+            for s in [(3, 3), (6, 3), (3, 6)]
+            for c in ["Q", "R", "both"]
+        ]
+        + [
+            f"shape={s}, gradient_test_case={c}, mode=complete"
+            for s in [(3, 3), (6, 3), (3, 6)]
+            for c in ["Q", "R", "both"]
+        ]
+        + [f"shape={s}, gradient_test_case=R, mode=r" for s in [(3, 3), (6, 3), (3, 6)]]
+        + ["shape=(3, 3), gradient_test_case=Q, mode=raw"]
+    ),
+)
+def test_qr_grad(shape, gradient_test_case, mode):
+    rng = np.random.default_rng(utt.fetch_seed())
+
+    def _test_fn(x, case=2, mode="reduced"):
+        if case == 0:
+            return qr(x, mode=mode)[0].sum()
+        elif case == 1:
+            return qr(x, mode=mode)[1].sum()
+        elif case == 2:
+            Q, R = qr(x, mode=mode)
+            return Q.sum() + R.sum()
+
+    m, n = shape
+    a = rng.standard_normal(shape).astype(config.floatX)
+
+    if m < n or (mode == "complete" and m != n) or mode == "raw":
+        with pytest.raises(NotImplementedError):
+            utt.verify_grad(
+                partial(_test_fn, case=gradient_test_case, mode=mode),
+                [a],
+                rng=np.random,
+            )
+
+    else:
+        utt.verify_grad(
+            partial(_test_fn, case=gradient_test_case, mode=mode), [a], rng=np.random
+        )
+
+
 class TestSvd(utt.InferShapeTester):
     op_class = SVD