SimplexLab
diff --git a/‎tests/unit/autojac/_transform/test_accumulate.py‎
Lines changed: 10 additions & 11 deletions b/‎tests/unit/autojac/_transform/test_accumulate.py‎
Lines changed: 10 additions & 11 deletions
diff --git a/‎tests/unit/autojac/_transform/test_aggregate.py‎
Lines changed: 1 addition & 3 deletions b/‎tests/unit/autojac/_transform/test_aggregate.py‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎tests/unit/autojac/_transform/test_base.py‎
Lines changed: 38 additions & 38 deletions b/‎tests/unit/autojac/_transform/test_base.py‎
Lines changed: 38 additions & 38 deletions
diff --git a/‎tests/unit/autojac/_transform/test_diagonalize.py‎
Lines changed: 3 additions & 3 deletions b/‎tests/unit/autojac/_transform/test_diagonalize.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎tests/unit/autojac/_transform/test_grad.py‎
Lines changed: 5 additions & 7 deletions b/‎tests/unit/autojac/_transform/test_grad.py‎
Lines changed: 5 additions & 7 deletions
diff --git a/‎tests/unit/autojac/_transform/test_init.py‎
Lines changed: 2 additions & 2 deletions b/‎tests/unit/autojac/_transform/test_init.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎tests/unit/autojac/_transform/test_interactions.py‎
Lines changed: 27 additions & 28 deletions b/‎tests/unit/autojac/_transform/test_interactions.py‎
Lines changed: 27 additions & 28 deletions
@@ -64,34 +64,33 @@ def test_multiple_accumulation(iterations: int):
     assert_tensor_dicts_are_close(grads, expected_grads)
 
 
-def test_accumulate_fails_on_no_requires_grad():
+def test_no_requires_grad_fails():
     """
     Tests that the Accumulate transform raises an error when it tries to populate a .grad of a
     tensor that does not require grad.
     """
 
-    key1 = torch.zeros([1], requires_grad=False, device=DEVICE)
-    value1 = torch.ones([1], device=DEVICE)
-    input = Gradients({key1: value1})
+    key = torch.zeros([1], requires_grad=False, device=DEVICE)
+    value = torch.ones([1], device=DEVICE)
+    input = Gradients({key: value})
 
-    accumulate = Accumulate([key1])
+    accumulate = Accumulate([key])
 
     with raises(ValueError):
         accumulate(input)
 
 
-def test_accumulate_fails_on_no_leaf_and_no_retains_grad():
+def test_no_leaf_and_no_retains_grad_fails():
     """
     Tests that the Accumulate transform raises an error when it tries to populate a .grad of a
     tensor that is not a leaf and that does not retain grad.
     """
 
-    a = torch.tensor([1.0], requires_grad=True, device=DEVICE)
-    key1 = 2 * a  # requires_grad=True, but is_leaf=False and retains_grad=False
-    value1 = torch.ones([1], device=DEVICE)
-    input = Gradients({key1: value1})
+    key = torch.tensor([1.0], requires_grad=True, device=DEVICE) * 2
+    value = torch.ones([1], device=DEVICE)
+    input = Gradients({key: value})
 
-    accumulate = Accumulate([key1])
+    accumulate = Accumulate([key])
 
     with raises(ValueError):
         accumulate(input)
@@ -55,9 +55,7 @@ def test_aggregate_matrices_output_structure(jacobian_matrices: JacobianMatrices
     assert set(jacobian_matrices.keys()) == set(gradient_vectors.keys())
 
     for key in jacobian_matrices.keys():
-        jacobian_matrix = jacobian_matrices[key]
-        gradient_vector = gradient_vectors[key]
-        assert gradient_vector.numel() == jacobian_matrix[0].numel()
+        assert gradient_vectors[key].numel() == jacobian_matrices[key][0].numel()
 
 
 def test_aggregate_matrices_empty_dict():
 
@@ -23,8 +23,8 @@ def __str__(self):
         return "T"
 
     def _compute(self, input: _B) -> _C:
-        # ignore the input, create a dictionary with the right keys as an output.
-        # cast the type for the purpose of type-checking.
+        # Ignore the input, create a dictionary with the right keys as an output.
+        # Cast the type for the purpose of type-checking.
         output_dict = {key: torch.empty(0, device=DEVICE) for key in self._output_keys}
         return typing.cast(_C, output_dict)
 
@@ -37,90 +37,90 @@ def output_keys(self) -> set[Tensor]:
         return self._output_keys
 
 
-def test_apply_keys():
+def test_call_checks_keys():
     """
-    Tests that a ``Transform`` checks that the provided dictionary to the `__apply__` function
+    Tests that a ``Transform`` checks that the provided dictionary to the `__call__` function
     contains keys that correspond exactly to `required_keys`.
     """
 
-    t1 = torch.randn([2], device=DEVICE)
-    t2 = torch.randn([3], device=DEVICE)
-    transform = FakeTransform({t1}, {t1, t2})
+    a1 = torch.randn([2], device=DEVICE)
+    a2 = torch.randn([3], device=DEVICE)
+    t = FakeTransform(required_keys={a1}, output_keys={a1, a2})
 
-    transform(TensorDict({t1: t2}))
+    t(TensorDict({a1: a2}))
 
     with raises(ValueError):
-        transform(TensorDict({t2: t1}))
+        t(TensorDict({a2: a1}))
 
     with raises(ValueError):
-        transform(TensorDict({}))
+        t(TensorDict({}))
 
     with raises(ValueError):
-        transform(TensorDict({t1: t2, t2: t1}))
+        t(TensorDict({a1: a2, a2: a1}))
 
 
-def test_compose_keys_match():
+def test_compose_checks_keys():
     """
     Tests that the composition of ``Transform``s checks that the inner transform's `output_keys`
     match with the outer transform's `required_keys`.
     """
 
-    t1 = torch.randn([2], device=DEVICE)
-    t2 = torch.randn([3], device=DEVICE)
-    transform1 = FakeTransform({t1}, {t1, t2})
-    transform2 = FakeTransform({t2}, {t1})
+    a1 = torch.randn([2], device=DEVICE)
+    a2 = torch.randn([3], device=DEVICE)
+    t1 = FakeTransform(required_keys={a1}, output_keys={a1, a2})
+    t2 = FakeTransform(required_keys={a2}, output_keys={a1})
 
-    transform1 << transform2
+    t1 << t2
 
     with raises(ValueError):
-        transform2 << transform1
+        t2 << t1
 
 
-def test_conjunct_required_keys():
+def test_conjunct_checks_required_keys():
     """
     Tests that the conjunction of ``Transform``s checks that the provided transforms all have the
     same `required_keys`.
     """
 
-    t1 = torch.randn([2], device=DEVICE)
-    t2 = torch.randn([3], device=DEVICE)
+    a1 = torch.randn([2], device=DEVICE)
+    a2 = torch.randn([3], device=DEVICE)
 
-    transform1 = FakeTransform({t1}, set())
-    transform2 = FakeTransform({t1}, set())
-    transform3 = FakeTransform({t2}, set())
+    t1 = FakeTransform(required_keys={a1}, output_keys=set())
+    t2 = FakeTransform(required_keys={a1}, output_keys=set())
+    t3 = FakeTransform(required_keys={a2}, output_keys=set())
 
-    transform1 | transform2
+    t1 | t2
 
     with raises(ValueError):
-        transform2 | transform3
+        t2 | t3
 
     with raises(ValueError):
-        transform1 | transform2 | transform3
+        t1 | t2 | t3
 
 
-def test_conjunct_wrong_output_keys():
+def test_conjunct_checks_output_keys():
     """
     Tests that the conjunction of ``Transform``s checks that the transforms `output_keys` are
     disjoint.
     """
 
-    t1 = torch.randn([2], device=DEVICE)
-    t2 = torch.randn([3], device=DEVICE)
+    a1 = torch.randn([2], device=DEVICE)
+    a2 = torch.randn([3], device=DEVICE)
 
-    transform1 = FakeTransform(set(), {t1, t2})
-    transform2 = FakeTransform(set(), {t1})
-    transform3 = FakeTransform(set(), {t2})
+    t1 = FakeTransform(required_keys=set(), output_keys={a1, a2})
+    t2 = FakeTransform(required_keys=set(), output_keys={a1})
+    t3 = FakeTransform(required_keys=set(), output_keys={a2})
 
-    transform2 | transform3
+    t2 | t3
 
     with raises(ValueError):
-        transform1 | transform3
+        t1 | t3
 
     with raises(ValueError):
-        transform1 | transform2 | transform3
+        t1 | t2 | t3
 
 
-def test_conjunction_empty_transforms():
+def test_empty_conjunction():
     """
     Tests that it is possible to take the conjunction of no transform. This should return an empty
     dictionary.
@@ -137,7 +137,7 @@ def test_str():
     conjunctions.
     """
 
-    t = FakeTransform(set(), set())
+    t = FakeTransform(required_keys=set(), output_keys=set())
     transform = (t | t << t << t | t) << t << (t | t)
 
     assert str(transform) == "(T | T ∘ T ∘ T | T) ∘ T ∘ (T | T)"
@@ -6,7 +6,7 @@
 from ._dict_assertions import assert_tensor_dicts_are_close
 
 
-def test_diagonalize_single_input():
+def test_single_input():
     """Tests that the Diagonalize transform works when given a single input."""
 
     key = torch.tensor([1.0, 2.0, 3.0], device=DEVICE)
@@ -23,7 +23,7 @@ def test_diagonalize_single_input():
     assert_tensor_dicts_are_close(output, expected_output)
 
 
-def test_diagonalize_multiple_inputs():
+def test_multiple_inputs():
     """Tests that the Diagonalize transform works when given multiple inputs."""
 
     key1 = torch.tensor([[1.0, 2.0], [4.0, 5.0]], device=DEVICE)
@@ -82,7 +82,7 @@ def test_diagonalize_multiple_inputs():
     assert_tensor_dicts_are_close(output, expected_output)
 
 
-def test_diagonalize_permute_order():
+def test_permute_order():
     """
     Tests that the Diagonalize transform outputs a permuted mapping when its keys are permuted.
     """
 
@@ -85,7 +85,7 @@ def test_retain_graph():
 
 def test_single_input_two_levels():
     """
-    Tests that the Grad transform works correctly for a very simple example of differentiation.
+    Tests that the Grad transform works correctly when composed with another Grad transform.
     Here, the function considered is: `z = a * x1 * x2`, which is computed in 2 parts: `y = a * x1`
     and `z = y * x2`. We want to compute the derivative of `z` with respect to the parameter `a`, by
     using chain rule. This derivative should be equal to `x1 * x2`.
@@ -238,9 +238,7 @@ def test_conjunction_of_grads_is_grad():
     x2 = torch.tensor(6.0, device=DEVICE)
     a1 = torch.tensor(2.0, requires_grad=True, device=DEVICE)
     a2 = torch.tensor(3.0, requires_grad=True, device=DEVICE)
-    y1 = a1 * x1
-    y2 = a2 * x2
-    y = torch.stack([y1, y2])
+    y = torch.stack([a1 * x1, a2 * x2])
     input = Gradients({y: torch.ones_like(y)})
 
     grad1 = Grad(outputs=[y], inputs=[a1], retain_graph=True)
@@ -258,10 +256,10 @@ def test_create_graph():
     """Tests that the Grad transform behaves correctly when `create_graph` is set to `True`."""
 
     a = torch.tensor(2.0, requires_grad=True, device=DEVICE)
-    b = a * a
-    input = Gradients({b: torch.ones_like(b)})
+    y = a * a
+    input = Gradients({y: torch.ones_like(y)})
 
-    grad = Grad(outputs=[b], inputs=[a], create_graph=True)
+    grad = Grad(outputs=[y], inputs=[a], create_graph=True)
 
     gradients = grad(input)
 
 
@@ -6,7 +6,7 @@
 from ._dict_assertions import assert_tensor_dicts_are_close
 
 
-def test_init_single_input():
+def test_single_input():
     """
     Tests that when there is a single key to initialize, the Init transform creates a TensorDict
     whose value is a tensor full of ones, of the same shape as its key.
@@ -23,7 +23,7 @@ def test_init_single_input():
     assert_tensor_dicts_are_close(output, expected_output)
 
 
-def test_init_multiple_input():
+def test_multiple_inputs():
     """
     Tests that when there are several keys to initialize, the Init transform creates a TensorDict
     whose values are tensors full of ones, of the same shape as their corresponding keys.
 
@@ -33,14 +33,17 @@ def test_jac_is_stack_of_grads():
     y2 = a2 * x
     input = Gradients({y1: torch.ones_like(y1), y2: torch.ones_like(y2)})
 
-    jac = Jac(outputs=[y1, y2], inputs=[a1, a2], chunk_size=None, retain_graph=True) << Diagonalize(
-        [y1, y2]
-    )
-    grad1 = Grad(outputs=[y1], inputs=[a1, a2]) << Select([y1], [y1, y2])
-    grad2 = Grad(outputs=[y2], inputs=[a1, a2]) << Select([y2], [y1, y2])
-    stack_of_grads = Stack([grad1, grad2])
+    jac = Jac(outputs=[y1, y2], inputs=[a1, a2], chunk_size=None, retain_graph=True)
+    diag = Diagonalize([y1, y2])
+    jac_diag = jac << diag
+
+    grad1 = Grad(outputs=[y1], inputs=[a1, a2])
+    grad2 = Grad(outputs=[y2], inputs=[a1, a2])
+    select1 = Select([y1], [y1, y2])
+    select2 = Select([y2], [y1, y2])
+    stack_of_grads = Stack([grad1 << select1, grad2 << select2])
 
-    jacobians = jac(input)
+    jacobians = jac_diag(input)
     expected_jacobians = stack_of_grads(input)
 
     assert_tensor_dicts_are_close(jacobians, expected_jacobians)
@@ -66,7 +69,7 @@ def test_single_differentiation():
     assert_tensor_dicts_are_close(output, expected_output)
 
 
-def test_multiple_differentiation_with_grad():
+def test_multiple_differentiations():
     """
     Tests that we can perform multiple scalar differentiations with the conjunction of multiple Grad
     transforms, composed with an Init transform.
@@ -78,10 +81,12 @@ def test_multiple_differentiation_with_grad():
     y2 = a2 * 3.0
     input = EmptyTensorDict()
 
-    grad1 = Grad([y1], [a1]) << Select([y1], [y1, y2])
-    grad2 = Grad([y2], [a2]) << Select([y2], [y1, y2])
+    grad1 = Grad([y1], [a1])
+    grad2 = Grad([y2], [a2])
+    select1 = Select([y1], [y1, y2])
+    select2 = Select([y2], [y1, y2])
     init = Init([y1, y2])
-    transform = (grad1 | grad2) << init
+    transform = ((grad1 << select1) | (grad2 << select2)) << init
 
     output = transform(input)
     expected_output = {
@@ -182,7 +187,8 @@ def test_conjunction_accumulate_select():
     """
     Tests that it is possible to conjunct an Accumulate and a Select in this order.
     It is not trivial since the type of the TensorDict returned by the first transform (Accumulate)
-    is EmptyDict, which is not the type that the conjunction should return (Gradients).
+    is EmptyDict, which is not the type that the conjunction should return (Gradients), but a
+    subclass of it.
     """
 
     key = torch.tensor([1.0, 2.0, 3.0], requires_grad=True, device=DEVICE)
@@ -199,7 +205,7 @@ def test_conjunction_accumulate_select():
     assert_tensor_dicts_are_close(output, expected_output)
 
 
-def test_equivalence_jac_grad():
+def test_equivalence_jac_grads():
     """
     Tests that differentiation in parallel using `_jac` is equivalent to sequential differentiation
     using several calls to `_grad` and stacking the resulting gradients.
@@ -219,18 +225,12 @@ def test_equivalence_jac_grad():
     outputs = [y1, y2]
     grad_outputs = [torch.ones_like(output) for output in outputs]
 
-    grad_dict_1 = Grad(
-        outputs=[outputs[0]],
-        inputs=inputs,
-        retain_graph=True,
-    )(Gradients({outputs[0]: grad_outputs[0]}))
+    grad1 = Grad(outputs=[outputs[0]], inputs=inputs, retain_graph=True)
+    grad_dict_1 = grad1(Gradients({outputs[0]: grad_outputs[0]}))
     grad_1_A, grad_1_b, grad_1_c = grad_dict_1[A], grad_dict_1[b], grad_dict_1[c]
 
-    grad_dict_2 = Grad(
-        outputs=[outputs[1]],
-        inputs=inputs,
-        retain_graph=True,
-    )(Gradients({outputs[1]: grad_outputs[1]}))
+    grad2 = Grad(outputs=[outputs[1]], inputs=inputs, retain_graph=True)
+    grad_dict_2 = grad2(Gradients({outputs[1]: grad_outputs[1]}))
     grad_2_A, grad_2_b, grad_2_c = grad_dict_2[A], grad_dict_2[b], grad_dict_2[c]
 
     n_outputs = len(outputs)
@@ -240,11 +240,10 @@ def test_equivalence_jac_grad():
     for i, grad_output in enumerate(grad_outputs):
         batched_grad_outputs[i][i] = grad_output
 
-    jac_dict = Jac(
-        outputs=outputs,
-        inputs=inputs,
-        chunk_size=None,
-    )(Jacobians({outputs[0]: batched_grad_outputs[0], outputs[1]: batched_grad_outputs[1]}))
+    jac = Jac(outputs=outputs, inputs=inputs, chunk_size=None)
+    jac_dict = jac(
+        Jacobians({outputs[0]: batched_grad_outputs[0], outputs[1]: batched_grad_outputs[1]})
+    )
     jac_A, jac_b, jac_c = jac_dict[A], jac_dict[b], jac_dict[c]
 
     assert_close(jac_A, torch.stack([grad_1_A, grad_2_A]))