Fix _get_descendant_accumulate_grads (#217)

ValerianRey · AustenMan · PierreQuinton · web-flow · commit a352cc930d3e · 2024-12-21T15:27:50.000+01:00
* Prevent nodes from being traversed twice in _get_descendant_accumulate_grads
* Use queue for nodes_to_traverse
* Rename variables in _get_descendant_accumulate_grads
* Add changelog entry

---------
Co-Authored-By: austen260 &lt;137848202+austen260@users.noreply.github.com&gt;
Co-authored-by: Pierre Quinton &lt;pierre.quinton@epfl.ch&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,12 @@ changes that do not affect the user.
 
 ## [Unreleased]
 
+### Changed
+
+- Improved the performance of the graph traversal function called by `backward` and `mtl_backward`
+  to find the tensors with respect to which differentiation should be done. It now visits every node
+  at most once.
+
 ## [0.3.0] - 2024-12-10
 
 ### Added
diff --git a/src/torchjd/autojac/_utils.py b/src/torchjd/autojac/_utils.py
@@ -1,3 +1,4 @@
+from collections import deque
 from typing import Iterable, Sequence
 
 from torch import Tensor
@@ -67,24 +68,22 @@ def _get_descendant_accumulate_grads(roots: set[Node], excluded_nodes: set[Node]
     :param excluded_nodes: Nodes excluded from the graph traversal.
     """
 
+    excluded_nodes = set(excluded_nodes)  # Re-instantiate set to avoid modifying input
     result = set()
-    nodes_to_traverse = [node for node in roots if node not in excluded_nodes]
+    nodes_to_traverse = deque(roots - excluded_nodes)
 
-    # This implementation more or less follows what is advised
-    # [here](https://discuss.pytorch.org/t/how-to-access-the-computational-graph/112887), but it is
-    # not necessarily robust to future changes, and it's not guaranteed to work.
-    # See [this](https://discuss.pytorch.org/t/autograd-graph-traversal/213658) for another question
-    # about how to implement this.
+    # This implementation more or less follows what is advised in
+    # https://discuss.pytorch.org/t/autograd-graph-traversal/213658 and what was suggested in
+    # https://github.com/TorchJD/torchjd/issues/216.
     while nodes_to_traverse:
-        current_node = nodes_to_traverse.pop()
+        node = nodes_to_traverse.popleft()  # Breadth-first
 
-        if current_node.__class__.__name__ == "AccumulateGrad":
-            result.add(current_node)
+        if node.__class__.__name__ == "AccumulateGrad":
+            result.add(node)
 
-        nodes_to_traverse += [
-            child[0]
-            for child in current_node.next_functions
-            if child[0] is not None and child[0] not in excluded_nodes
-        ]
+        for child, _ in node.next_functions:
+            if child is not None and child not in excluded_nodes:
+                nodes_to_traverse.append(child)  # Append to the right
+                excluded_nodes.add(child)
 
     return result