From 5d2bdb4c542105137d8968289d18c33ee82b1a70 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Fri, 16 Jan 2026 19:52:13 -0600 Subject: [PATCH 1/8] Explain topsort's cycle-finding algorithm, and why it's written that way. --- Lib/graphlib.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/Lib/graphlib.py b/Lib/graphlib.py index 7961c9c5cac2d6..ba5d4dec211f83 100644 --- a/Lib/graphlib.py +++ b/Lib/graphlib.py @@ -199,6 +199,7 @@ def done(self, *nodes): self._ready_nodes.append(successor) self._nfinished += 1 + # See note "On Finding Cycles" at the bottom. def _find_cycle(self): n2i = self._node2info stack = [] @@ -212,8 +213,6 @@ def _find_cycle(self): while True: if node in seen: - # If we have seen already the node and is in the - # current stack we have found a cycle. if node in node2stacki: return stack[node2stacki[node] :] + [node] # else go on to get next successor @@ -228,11 +227,15 @@ def _find_cycle(self): while stack: try: node = itstack[-1]() - break + break # resume at top ot "while Tree:" except StopIteration: + # no more successors; pop the stack + # and continue looking up del node2stacki[stack.pop()] itstack.pop() else: + # stack is empty; look for a fresh node to + # start over from (a node not yet in seen) break return None @@ -252,3 +255,53 @@ def static_order(self): self.done(*node_group) __class_getitem__ = classmethod(GenericAlias) + +# On Finding Cycles +# ----------------- +# There is a (at least one) total order if and only if the graph is +# acyclic. +# +# When it is cyclic, "there's a cycle - somewhere!" isn't very helpful. +# In theory, it would be most helpful to partition the graph into +# strongly connected components (SCCs) and display those with more than +# one node. Then all cycles could easily be identified "by eyeball". +# +# That's a lot of work, though, and we can get most of the benefit much +# more easily just by showing a single specific cycle. +# +# Finding a cycle is most natural via a breadth first search, which can +# easily be arranged to find a shortest-possible cycle. But memory +# burden can be high, because every path-in-progress has to keep its own +# idea of what "the path" is so far. +# +# Depth first search (DFS) is much easier on RAM, only requiring keeping +# track of _the_ path from the starting node to the current node at the +# current recursion level. But there may be any number of nodes, and so +# there's no bound on recursion depth short of the total number of +# nodes. +# +# So we use an iterative version of DFS, keeping an exploit list +# (`stack`) of the path so far. A parallel stack (`itstack`) holds the +# `__next__` method of an iterator over the current level's node's +# successors, so when backtracking to a shallower level we can just call +# that to get the node's next successor. This is state that a recursive +# version would implicitly store in a `for` loop's internals. +# +# `seen()` is a set recording which nodes have already been, at some +# time, pushed on the stack. If a node has been pushed on the stack, DFS +# will find any cycle it's part of, so there's no need to ever look at +# it again. +# +# Finally, `node2stacki` maps a node to its index on the current stack, +# for and only for nodes currently _on_ the stack. If a successor to be +# pushed on the stack is in that dict, the node is already on the path, +# at that index. The cycle is then `stack[that_index :] + [node]`. +# +# As is often the case when removing recursion, the control flow looks a +# bit off. The "while True:" loop here rarely actually loops - it's only +# looking to go "up the stack" until finding a level that has another +# successor to consider, emulating a chain of returns in a recursive +# version. +# +# Worst case time is linear in the number of nodes plus the number of +# edges. Worst case memory burden is linear in the number of nodes. From 2e2c3cc2769e23cc35f7f68ac666e3dbda9d2ca4 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Fri, 16 Jan 2026 20:05:15 -0600 Subject: [PATCH 2/8] typo --- Lib/graphlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/graphlib.py b/Lib/graphlib.py index ba5d4dec211f83..489f3553f74d69 100644 --- a/Lib/graphlib.py +++ b/Lib/graphlib.py @@ -227,7 +227,7 @@ def _find_cycle(self): while stack: try: node = itstack[-1]() - break # resume at top ot "while Tree:" + break # resume at top ot "while True:" except StopIteration: # no more successors; pop the stack # and continue looking up From 437593a834d8911c2dc239dde09f888d11a80af5 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Fri, 16 Jan 2026 20:16:46 -0600 Subject: [PATCH 3/8] Typo. --- Lib/graphlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/graphlib.py b/Lib/graphlib.py index 489f3553f74d69..cae800e1d09340 100644 --- a/Lib/graphlib.py +++ b/Lib/graphlib.py @@ -227,7 +227,7 @@ def _find_cycle(self): while stack: try: node = itstack[-1]() - break # resume at top ot "while True:" + break # resume at top of "while True:" except StopIteration: # no more successors; pop the stack # and continue looking up From ed822fc2db294778d52a7711b6bc7ae5e9bbf91b Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Tue, 20 Jan 2026 16:45:56 -0600 Subject: [PATCH 4/8] Address seom review commmnts. --- Lib/graphlib.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Lib/graphlib.py b/Lib/graphlib.py index cae800e1d09340..f44cb450791cfd 100644 --- a/Lib/graphlib.py +++ b/Lib/graphlib.py @@ -269,16 +269,16 @@ def static_order(self): # That's a lot of work, though, and we can get most of the benefit much # more easily just by showing a single specific cycle. # -# Finding a cycle is most natural via a breadth first search, which can -# easily be arranged to find a shortest-possible cycle. But memory -# burden can be high, because every path-in-progress has to keep its own -# idea of what "the path" is so far. +# Approaches to that are based on breadth first or depth first search +# (BFS or DFS). BFS is most natural, which can easily be arranged to +# find a shortest-possible cycle. But memory burden can be high, because +# every path-in-progress has to keep its own idea of what "the path" is +# so far. # -# Depth first search (DFS) is much easier on RAM, only requiring keeping -# track of _the_ path from the starting node to the current node at the -# current recursion level. But there may be any number of nodes, and so -# there's no bound on recursion depth short of the total number of -# nodes. +# DFS is much easier on RAM, only requiring keeping track of _the_ path +# from the starting node to the current node at the current recursion +# level. But there may be any number of nodes, and so there's no bound +# on recursion depth short of the total number of nodes. # # So we use an iterative version of DFS, keeping an exploit list # (`stack`) of the path so far. A parallel stack (`itstack`) holds the @@ -303,5 +303,6 @@ def static_order(self): # successor to consider, emulating a chain of returns in a recursive # version. # -# Worst case time is linear in the number of nodes plus the number of -# edges. Worst case memory burden is linear in the number of nodes. +# Worst cases: O(V+E) for time, and O(V) for memory, where V is the +# number of nodes and V the number edges (which may be quadratic in V!). +# It requires care to ensure these bounds are met. From 413caa2ee5300f6495247494e3186a7321dac0f7 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Tue, 20 Jan 2026 16:53:18 -0600 Subject: [PATCH 5/8] typo --- Lib/graphlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/graphlib.py b/Lib/graphlib.py index f44cb450791cfd..2fe84958258d21 100644 --- a/Lib/graphlib.py +++ b/Lib/graphlib.py @@ -304,5 +304,5 @@ def static_order(self): # version. # # Worst cases: O(V+E) for time, and O(V) for memory, where V is the -# number of nodes and V the number edges (which may be quadratic in V!). +# number of nodes and E the number edges (which may be quadratic in V!). # It requires care to ensure these bounds are met. From 53ab330164cd95d19473de38c00b795b6b5b7034 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Tue, 20 Jan 2026 18:44:29 -0600 Subject: [PATCH 6/8] typo --- Lib/graphlib.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/graphlib.py b/Lib/graphlib.py index 2fe84958258d21..9b180dbfccf8f7 100644 --- a/Lib/graphlib.py +++ b/Lib/graphlib.py @@ -1,4 +1,4 @@ -from types import GenericAlias +4from types import GenericAlias __all__ = ["TopologicalSorter", "CycleError"] @@ -304,5 +304,5 @@ def static_order(self): # version. # # Worst cases: O(V+E) for time, and O(V) for memory, where V is the -# number of nodes and E the number edges (which may be quadratic in V!). -# It requires care to ensure these bounds are met. +# number of nodes and E the number of edges (which may be quadratic in +# V!). It requires care to ensure these bounds are met. From 31d13023ffdcccdeb7e8f24feaee8053dc6b61e5 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Tue, 20 Jan 2026 18:46:19 -0600 Subject: [PATCH 7/8] Update Lib/graphlib.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/graphlib.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/graphlib.py b/Lib/graphlib.py index 9b180dbfccf8f7..d6ad6f28a19882 100644 --- a/Lib/graphlib.py +++ b/Lib/graphlib.py @@ -256,6 +256,7 @@ def static_order(self): __class_getitem__ = classmethod(GenericAlias) + # On Finding Cycles # ----------------- # There is a (at least one) total order if and only if the graph is From 752a3cd4a97983e58d9cdb1430407841dd5b947f Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Tue, 20 Jan 2026 18:58:30 -0600 Subject: [PATCH 8/8] Another typo! --- Lib/graphlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/graphlib.py b/Lib/graphlib.py index 9b180dbfccf8f7..e6f93e269d44db 100644 --- a/Lib/graphlib.py +++ b/Lib/graphlib.py @@ -1,4 +1,4 @@ -4from types import GenericAlias +from types import GenericAlias __all__ = ["TopologicalSorter", "CycleError"]