From ff2149734388ea675848fe336858e85cd3c2946a Mon Sep 17 00:00:00 2001 From: Josef Skladanka Date: Mon, 4 May 2026 11:16:15 +0200 Subject: [PATCH] fix(dependency_graph): remove orphaned children in remove_dependency When a node is removed, child nodes that have no remaining parents are now removed as well, preventing orphaned nodes from lingering in the graph. Closes: #1041 Signed-off-by: Josef Skladanka Co-Authored-By: Claude --- src/fromager/dependency_graph.py | 76 ++++++---- tests/test_dependency_graph.py | 237 +++++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+), 28 deletions(-) diff --git a/src/fromager/dependency_graph.py b/src/fromager/dependency_graph.py index b4612867..c3f78cea 100644 --- a/src/fromager/dependency_graph.py +++ b/src/fromager/dependency_graph.py @@ -1,5 +1,6 @@ from __future__ import annotations +import collections import dataclasses import graphlib import json @@ -346,12 +347,10 @@ def remove_dependency( req_name: NormalizedName, req_version: Version, ) -> None: - """Remove a dependency node from the graph. + """Remove a dependency node and any orphaned descendants from the graph. - Removes the node and all edges pointing to it. This includes: - - Back-references in child nodes' parents lists - - Forward edges in parent nodes' children lists - Child nodes of the removed node are kept if referenced elsewhere. + Removes the node and all edges pointing to it. Child nodes that have + no remaining parents after removal are iteratively removed as well. Args: req_name: Canonical name of the package @@ -362,29 +361,50 @@ def remove_dependency( logger.debug(f"Cannot remove {key} - not in graph") return - logger.debug(f"Removing failed dependency {key} from graph") - - deleted_node = self.nodes[key] - - # Clean up back-references (parents) in nodes that were children of the removed node - for child_edge in deleted_node.children: - child_node = child_edge.destination_node - filtered_parents = [ - edge for edge in child_node.parents if edge.destination_node.key != key - ] - child_node.parents.clear() - child_node.parents.extend(filtered_parents) - - # Remove the node itself - del self.nodes[key] - - # Remove forward edges from any node whose children pointed to the removed node - for node in self.nodes.values(): - filtered_children = [ - edge for edge in node.children if edge.destination_node.key != key - ] - node.children.clear() - node.children.extend(filtered_children) + queue: collections.deque[str] = collections.deque([key]) + + # BFS over orphaned descendants + while queue: + key = queue.popleft() + # This is a defensive check, should not happen in normal operation + if key not in self.nodes: + logger.debug(f"Cannot remove {key} - not in graph") + continue + + logger.debug(f"Removing failed dependency {key} from graph") + + deleted_node = self.nodes[key] + + # Remove references to this node from its direct children + children = [] + for child_edge in deleted_node.children: + child_node = child_edge.destination_node + filtered_parents = [ + edge + for edge in child_node.parents + if edge.destination_node.key != key + ] + child_node.parents.clear() + child_node.parents.extend(filtered_parents) + children.append(child_node) + + # Remove references to this node from its direct parents + for parent_edge in deleted_node.parents: + parent_node = parent_edge.destination_node + filtered_children = [ + edge + for edge in parent_node.children + if edge.destination_node.key != key + ] + parent_node.children.clear() + parent_node.children.extend(filtered_children) + + del self.nodes[key] + + # Enqueue children that have become orphans + for child in children: + if child.key != ROOT and child.key in self.nodes and not child.parents: + queue.append(child.key) def get_dependency_edges( self, match_dep_types: list[RequirementType] | None = None diff --git a/tests/test_dependency_graph.py b/tests/test_dependency_graph.py index 2c8b7b7f..c4086f4a 100644 --- a/tests/test_dependency_graph.py +++ b/tests/test_dependency_graph.py @@ -468,3 +468,240 @@ def test_tracking_topology_sorter_empty_graph() -> None: with pytest.raises(ValueError) as excinfo: topo.get_available() assert "topology is not active" in str(excinfo.value) + + +def _build_graph(*edges: tuple[str, str, str]) -> DependencyGraph: + """Build a DependencyGraph from (parent, child, req_type) triples. + + Use "ROOT" as parent name for top-level dependencies. + """ + graph = DependencyGraph() + for parent_name, child_name, req_type_str in edges: + parent_n = None if parent_name == "ROOT" else canonicalize_name(parent_name) + parent_v = None if parent_name == "ROOT" else Version("1.0") + graph.add_dependency( + parent_name=parent_n, + parent_version=parent_v, + req_type=RequirementType(req_type_str), + req=Requirement(f"{child_name}==1.0"), + req_version=Version("1.0"), + ) + return graph + + +def test_remove_dependency_basic() -> None: + """Removing a leaf node cleans it from nodes and parent's children.""" + graph = _build_graph( + ("ROOT", "a", "toplevel"), + ("a", "b", "install"), + ) + assert "b==1.0" in graph.nodes + assert len(graph.nodes["a==1.0"].children) == 1 + + graph.remove_dependency(canonicalize_name("b"), Version("1.0")) + + assert "b==1.0" not in graph.nodes + assert len(graph.nodes["a==1.0"].children) == 0 + + +def test_remove_dependency_cascades_orphans() -> None: + """Removing a node recursively removes its orphaned descendants.""" + # ROOT -> a -> b -> c (linear chain) + graph = _build_graph( + ("ROOT", "a", "toplevel"), + ("a", "b", "install"), + ("b", "c", "install"), + ) + assert "a==1.0" in graph.nodes + assert "b==1.0" in graph.nodes + assert "c==1.0" in graph.nodes + + graph.remove_dependency(canonicalize_name("a"), Version("1.0")) + + assert "a==1.0" not in graph.nodes + assert "b==1.0" not in graph.nodes + assert "c==1.0" not in graph.nodes + assert len(graph) == 0 + + +def test_remove_dependency_keeps_shared_children() -> None: + """Children with other parents are kept; only the stale parent edge is removed.""" + # ROOT -> a -> shared + # ROOT -> b -> shared + graph = _build_graph( + ("ROOT", "a", "toplevel"), + ("ROOT", "b", "toplevel"), + ("a", "shared", "install"), + ("b", "shared", "install"), + ) + shared_node = graph.nodes["shared==1.0"] + assert len(shared_node.parents) == 2 + + graph.remove_dependency(canonicalize_name("a"), Version("1.0")) + + assert "a==1.0" not in graph.nodes + assert "shared==1.0" in graph.nodes + assert len(shared_node.parents) == 1 + assert shared_node.parents[0].destination_node.key == "b==1.0" + + +def test_remove_dependency_diamond_sequential() -> None: + """Diamond: shared child survives first removal, cleaned up by second.""" + # ROOT -> a -> c + # ROOT -> b -> c + graph = _build_graph( + ("ROOT", "a", "toplevel"), + ("ROOT", "b", "toplevel"), + ("a", "c", "install"), + ("b", "c", "install"), + ) + + graph.remove_dependency(canonicalize_name("a"), Version("1.0")) + + assert "a==1.0" not in graph.nodes + assert "c==1.0" in graph.nodes + assert len(graph.nodes["c==1.0"].parents) == 1 + + graph.remove_dependency(canonicalize_name("b"), Version("1.0")) + + assert "b==1.0" not in graph.nodes + assert "c==1.0" not in graph.nodes + assert len(graph) == 0 + + +def test_remove_dependency_already_removed_child() -> None: + """Removing a node whose child was already removed is safe.""" + # ROOT -> a -> b -> c + graph = _build_graph( + ("ROOT", "a", "toplevel"), + ("a", "b", "install"), + ("b", "c", "install"), + ) + + graph.remove_dependency(canonicalize_name("c"), Version("1.0")) + graph.remove_dependency(canonicalize_name("b"), Version("1.0")) + + assert "b==1.0" not in graph.nodes + assert "c==1.0" not in graph.nodes + assert "a==1.0" in graph.nodes + assert len(graph.nodes["a==1.0"].children) == 0 + + +def test_remove_dependency_mid_graph_cascades() -> None: + """Removing a mid-graph node cascades to its exclusive subtree. + + Verifies both that orphaned nodes are gone and that the surviving + tree structure (edges in both directions) is fully intact. + """ + # ROOT -> a -> b -> d + # b -> e + # a -> c + graph = _build_graph( + ("ROOT", "a", "toplevel"), + ("a", "b", "install"), + ("a", "c", "install"), + ("b", "d", "install"), + ("b", "e", "install"), + ) + + graph.remove_dependency(canonicalize_name("b"), Version("1.0")) + + # Removed subtree is gone + assert "b==1.0" not in graph.nodes + assert "d==1.0" not in graph.nodes + assert "e==1.0" not in graph.nodes + + # Exactly ROOT, a, c remain + assert set(graph.nodes.keys()) == {"", "a==1.0", "c==1.0"} + + # ROOT -> a edge intact + root = graph.get_root_node() + assert len(root.children) == 1 + assert root.children[0].destination_node.key == "a==1.0" + + # a -> c is the only surviving child edge, and a's parent is ROOT + node_a = graph.nodes["a==1.0"] + assert len(node_a.children) == 1 + assert node_a.children[0].destination_node.key == "c==1.0" + assert len(node_a.parents) == 1 + assert node_a.parents[0].destination_node.key == "" + + # c has a as parent, no children + node_c = graph.nodes["c==1.0"] + assert len(node_c.children) == 0 + assert len(node_c.parents) == 1 + assert node_c.parents[0].destination_node.key == "a==1.0" + + +def test_remove_dependency_shared_child_kept_by_other_subtree() -> None: + """Shared child and its descendants survive when kept alive by another subtree. + + ROOT -> a -> b -> d + a -> c -> d + ROOT -> e -> c + + Removing a should remove a, b; c and d survive because e still parents c. + """ + graph = _build_graph( + ("ROOT", "a", "toplevel"), + ("ROOT", "e", "toplevel"), + ("a", "b", "install"), + ("a", "c", "install"), + ("b", "d", "install"), + ("c", "d", "install"), + ("e", "c", "install"), + ) + + graph.remove_dependency(canonicalize_name("a"), Version("1.0")) + + assert "a==1.0" not in graph.nodes + assert "b==1.0" not in graph.nodes + assert "c==1.0" in graph.nodes + assert "d==1.0" in graph.nodes + assert "e==1.0" in graph.nodes + + node_c = graph.nodes["c==1.0"] + assert len(node_c.parents) == 1 + assert node_c.parents[0].destination_node.key == "e==1.0" + assert len(node_c.children) == 1 + assert node_c.children[0].destination_node.key == "d==1.0" + + node_d = graph.nodes["d==1.0"] + assert len(node_d.parents) == 1 + assert node_d.parents[0].destination_node.key == "c==1.0" + + +def test_remove_dependency_cascades_through_diamond() -> None: + """Single removal cascades through a diamond, removing all orphans. + + ROOT -> a -> b -> d + a -> c -> d + + Removing a orphans b and c; as both are removed, d loses all parents + and is also removed. + """ + graph = _build_graph( + ("ROOT", "a", "toplevel"), + ("a", "b", "install"), + ("a", "c", "install"), + ("b", "d", "install"), + ("c", "d", "install"), + ) + + graph.remove_dependency(canonicalize_name("a"), Version("1.0")) + + assert "a==1.0" not in graph.nodes + assert "b==1.0" not in graph.nodes + assert "c==1.0" not in graph.nodes + assert "d==1.0" not in graph.nodes + assert len(graph) == 0 + + +def test_remove_dependency_nonexistent() -> None: + """Removing a node not in the graph is a no-op.""" + graph = _build_graph(("ROOT", "a", "toplevel")) + node_count = len(graph.nodes) + + graph.remove_dependency(canonicalize_name("nonexistent"), Version("1.0")) + + assert len(graph.nodes) == node_count