393 lines
13 KiB
Python
393 lines
13 KiB
Python
# topsort - dependency (topological) sorting and cycle finding functions
|
|
# Copyright (C) 2007 RADLogic
|
|
#
|
|
# This library is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU Lesser General Public
|
|
# License as published by the Free Software Foundation;
|
|
# version 2.1 of the License.
|
|
#
|
|
# This library is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# Lesser General Public License for more details.
|
|
#
|
|
# See http://www.fsf.org/licensing/licenses/lgpl.txt for full license text.
|
|
"""Provide toplogical sorting (i.e. dependency sorting) functions.
|
|
|
|
The topsort function is based on code posted on Usenet by Tim Peters.
|
|
|
|
Modifications:
|
|
- added doctests
|
|
- changed some bits to use current Python idioms
|
|
(listcomp instead of filter, +=/-=, inherit from Exception)
|
|
- added a topsort_levels version that ports items in each dependency level
|
|
into a sub-list
|
|
- added find_cycles to aid in cycle debugging
|
|
|
|
Run this module directly to run the doctests (unittests).
|
|
Make sure they all pass before checking in any modifications.
|
|
|
|
Requires Python >= 2.2
|
|
(For Python 2.2 also requires separate sets.py module)
|
|
|
|
This requires the rad_util.py module.
|
|
|
|
"""
|
|
|
|
# Provide support for Python 2.2*
|
|
from __future__ import generators
|
|
|
|
__version__ = '$Revision: 0.9 $'
|
|
__date__ = '$Date: 2007/03/27 04:15:26 $'
|
|
__credits__ = '''Tim Peters -- original topsort code
|
|
Tim Wegener -- doctesting, updating to current idioms, topsort_levels,
|
|
find_cycles
|
|
'''
|
|
|
|
# Make Python 2.3 sets look like Python 2.4 sets.
|
|
try:
|
|
set
|
|
except NameError:
|
|
from sets import Set as set
|
|
|
|
from rad_util import is_rotated
|
|
|
|
|
|
class CycleError(Exception):
|
|
"""Cycle Error"""
|
|
pass
|
|
|
|
|
|
def topsort(pairlist):
|
|
"""Topologically sort a list of (parent, child) pairs.
|
|
|
|
Return a list of the elements in dependency order (parent to child order).
|
|
|
|
>>> print topsort( [(1,2), (3,4), (5,6), (1,3), (1,5), (1,6), (2,5)] )
|
|
[1, 2, 3, 5, 4, 6]
|
|
|
|
>>> print topsort( [(1,2), (1,3), (2,4), (3,4), (5,6), (4,5)] )
|
|
[1, 2, 3, 4, 5, 6]
|
|
|
|
>>> print topsort( [(1,2), (2,3), (3,2)] )
|
|
Traceback (most recent call last):
|
|
CycleError: ([1], {2: 1, 3: 1}, {2: [3], 3: [2]})
|
|
|
|
"""
|
|
num_parents = {} # element -> # of predecessors
|
|
children = {} # element -> list of successors
|
|
for parent, child in pairlist:
|
|
# Make sure every element is a key in num_parents.
|
|
if not num_parents.has_key( parent ):
|
|
num_parents[parent] = 0
|
|
if not num_parents.has_key( child ):
|
|
num_parents[child] = 0
|
|
|
|
# Since child has a parent, increment child's num_parents count.
|
|
num_parents[child] += 1
|
|
|
|
# ... and parent gains a child.
|
|
children.setdefault(parent, []).append(child)
|
|
|
|
# Suck up everything without a parent.
|
|
answer = [x for x in num_parents.keys() if num_parents[x] == 0]
|
|
|
|
# For everything in answer, knock down the parent count on its children.
|
|
# Note that answer grows *in* the loop.
|
|
for parent in answer:
|
|
del num_parents[parent]
|
|
if children.has_key( parent ):
|
|
for child in children[parent]:
|
|
num_parents[child] -= 1
|
|
if num_parents[child] == 0:
|
|
answer.append( child )
|
|
# Following "del" isn't needed; just makes
|
|
# CycleError details easier to grasp.
|
|
del children[parent]
|
|
|
|
if num_parents:
|
|
# Everything in num_parents has at least one child ->
|
|
# there's a cycle.
|
|
raise CycleError(answer, num_parents, children)
|
|
return answer
|
|
|
|
def topsort_levels(pairlist):
|
|
"""Topologically sort a list of (parent, child) pairs into depth levels.
|
|
|
|
This returns a generator.
|
|
Turn this into a an iterator using the iter built-in function.
|
|
(if you iterate over the iterator, each element gets generated when
|
|
it is asked for, rather than generating the whole list up-front.)
|
|
|
|
Each generated element is a list of items at that dependency level.
|
|
|
|
>>> dependency_pairs = [(1,2), (3,4), (5,6), (1,3), (1,5), (1,6), (2,5)]
|
|
>>> for level in iter(topsort_levels( dependency_pairs )):
|
|
... print level
|
|
[1]
|
|
[2, 3]
|
|
[4, 5]
|
|
[6]
|
|
|
|
>>> dependency_pairs = [(1,2), (1,3), (2,4), (3,4), (5,6), (4,5)]
|
|
>>> for level in iter(topsort_levels( dependency_pairs )):
|
|
... print level
|
|
[1]
|
|
[2, 3]
|
|
[4]
|
|
[5]
|
|
[6]
|
|
|
|
>>> dependency_pairs = [(1,2), (2,3), (3,4), (4, 3)]
|
|
>>> try:
|
|
... for level in iter(topsort_levels( dependency_pairs )):
|
|
... print level
|
|
... except CycleError, exc:
|
|
... print 'CycleError:', exc
|
|
[1]
|
|
[2]
|
|
CycleError: ({3: 1, 4: 1}, {3: [4], 4: [3]})
|
|
|
|
|
|
The cycle error should look like.
|
|
CycleError: ({3: 1, 4: 1}, {3: [4], 4: [3]})
|
|
# todo: Make the doctest more robust (i.e. handle arbitrary dict order).
|
|
|
|
"""
|
|
num_parents = {} # element -> # of predecessors
|
|
children = {} # element -> list of successors
|
|
for parent, child in pairlist:
|
|
# Make sure every element is a key in num_parents.
|
|
if not num_parents.has_key( parent ):
|
|
num_parents[parent] = 0
|
|
if not num_parents.has_key( child ):
|
|
num_parents[child] = 0
|
|
|
|
# Since child has a parent, increment child's num_parents count.
|
|
num_parents[child] += 1
|
|
|
|
# ... and parent gains a child.
|
|
children.setdefault(parent, []).append(child)
|
|
|
|
return topsort_levels_core(num_parents, children)
|
|
|
|
def topsort_levels_core(num_parents, children):
|
|
"""Topologically sort a bunch of interdependent items based on dependency.
|
|
|
|
This returns a generator.
|
|
Turn this into a an iterator using the iter built-in function.
|
|
(if you iterate over the iterator, each element gets generated when
|
|
it is asked for, rather than generating the whole list up-front.)
|
|
|
|
Each generated element is a list of items at that dependency level.
|
|
|
|
>>> list(topsort_levels_core(
|
|
... {1: 0, 2: 1, 3: 1, 4: 1, 5: 2, 6: 2},
|
|
... {1: [2, 3, 5, 6], 2: [5], 3: [4], 4: [], 5: [6]}))
|
|
[[1], [2, 3], [4, 5], [6]]
|
|
|
|
>>> list(topsort_levels_core(
|
|
... {1: 0, 2: 2, 3: 1},
|
|
... {1: [2], 2: [3], 3: [2]}))
|
|
Traceback (most recent call last):
|
|
CycleError: ({2: 1, 3: 1}, {2: [3], 3: [2]})
|
|
|
|
This function has a more complicated interface than topsort_levels,
|
|
but is useful if the data is easier to generate in this form.
|
|
|
|
Arguments:
|
|
num_parents -- key: item, value: number of parents (predecessors)
|
|
children -- key: item, value: list of children (successors)
|
|
|
|
"""
|
|
while 1:
|
|
# Suck up everything without a predecessor.
|
|
level_parents = [x for x in num_parents.keys() if num_parents[x] == 0]
|
|
|
|
if not level_parents:
|
|
break
|
|
|
|
# Offer the next generated item,
|
|
# which is a list of the items at this dependency level.
|
|
yield level_parents
|
|
|
|
# For everything item in this level,
|
|
# decrement the parent count,
|
|
# since we have accounted for its parent.
|
|
for level_parent in level_parents:
|
|
|
|
del num_parents[level_parent]
|
|
|
|
if children.has_key(level_parent):
|
|
for level_parent_child in children[level_parent]:
|
|
num_parents[level_parent_child] -= 1
|
|
del children[level_parent]
|
|
|
|
if num_parents:
|
|
# Everything in num_parents has at least one child ->
|
|
# there's a cycle.
|
|
raise CycleError(num_parents, children)
|
|
else:
|
|
# This is the end of the generator.
|
|
raise StopIteration
|
|
|
|
|
|
def find_cycles(parent_children):
|
|
"""Yield cycles. Each result is a list of items comprising a cycle.
|
|
|
|
Use a 'stack' based approach to find all the cycles.
|
|
This is a generator, so yields each cycle as it finds it.
|
|
|
|
It is implicit that the last item in each cycle list is a parent of the
|
|
first item (thereby forming a cycle).
|
|
|
|
Arguments:
|
|
parent_children -- parent -> collection of children
|
|
|
|
Simplest cycle:
|
|
>>> cycles = list(find_cycles({'A': ['B'], 'B': ['A']}))
|
|
>>> len(cycles)
|
|
1
|
|
>>> cycle = cycles[0]
|
|
>>> cycle.sort()
|
|
>>> print cycle
|
|
['A', 'B']
|
|
|
|
Simplest cycle with extra baggage at the start and the end:
|
|
>>> cycles = list(find_cycles(parent_children={'A': ['B'],
|
|
... 'B': ['C'],
|
|
... 'C': ['B', 'D'],
|
|
... 'D': [],
|
|
... }))
|
|
>>> len(cycles)
|
|
1
|
|
>>> cycle = cycles[0]
|
|
>>> cycle.sort()
|
|
>>> print cycle
|
|
['B', 'C']
|
|
|
|
Double cycle:
|
|
>>> cycles = list(find_cycles(parent_children={'A': ['B'],
|
|
... 'B': ['C1', 'C2'],
|
|
... 'C1': ['D1'],
|
|
... 'D1': ['E1'],
|
|
... 'E1': ['D1'],
|
|
... 'C2': ['D2'],
|
|
... 'D2': ['E2'],
|
|
... 'E2': ['D2'],
|
|
... }))
|
|
>>> len(cycles)
|
|
2
|
|
>>> for cycle in cycles:
|
|
... cycle.sort()
|
|
>>> cycles.sort()
|
|
>>> cycle1 = cycles[0]
|
|
>>> cycle1.sort()
|
|
>>> print cycle1
|
|
['D1', 'E1']
|
|
>>> cycle2 = cycles[1]
|
|
>>> cycle2.sort()
|
|
>>> print cycle2
|
|
['D2', 'E2']
|
|
|
|
Simple cycle with children not specified for one item:
|
|
# todo: Should this barf instead?
|
|
>>> cycles = list(find_cycles(parent_children={'A': ['B'],
|
|
... 'B': ['A'],
|
|
... 'C': ['D']}))
|
|
>>> len(cycles)
|
|
1
|
|
>>> cycle = cycles[0]
|
|
>>> cycle.sort()
|
|
>>> print cycle
|
|
['A', 'B']
|
|
|
|
Diamond cycle
|
|
>>> cycles = list(find_cycles(parent_children={'A': ['B1', 'B2'],
|
|
... 'B1': ['C'],
|
|
... 'B2': ['C'],
|
|
... 'C': ['A', 'B1']}))
|
|
>>> len(cycles)
|
|
3
|
|
>>> sorted_cycles = []
|
|
>>> for cycle in cycles:
|
|
... cycle = list(cycle)
|
|
... cycle.sort()
|
|
... sorted_cycles.append(cycle)
|
|
>>> sorted_cycles.sort()
|
|
>>> for cycle in sorted_cycles:
|
|
... print cycle
|
|
['A', 'B1', 'C']
|
|
['A', 'B2', 'C']
|
|
['B1', 'C']
|
|
|
|
Hairy case (order can matter if something is wrong):
|
|
(Note order of B and C in the list.)
|
|
>>> cycles = list(find_cycles(parent_children={
|
|
... 'TD': ['DD'],
|
|
... 'TC': ['DC'],
|
|
... 'DC': ['DQ'],
|
|
... 'C': ['DQ'],
|
|
... 'DQ': ['IA', 'TO'],
|
|
... 'IA': ['A'],
|
|
... 'A': ['B', 'C'],
|
|
... }))
|
|
>>> len(cycles)
|
|
1
|
|
>>> cycle = cycles[0]
|
|
>>> cycle.sort()
|
|
>>> print cycle
|
|
['A', 'C', 'DQ', 'IA']
|
|
|
|
"""
|
|
cycles = []
|
|
visited_nodes = set()
|
|
|
|
for parent in parent_children:
|
|
if parent in visited_nodes:
|
|
# This node is part of a path that has already been traversed.
|
|
continue
|
|
|
|
paths = [[parent]]
|
|
while paths:
|
|
path = paths.pop()
|
|
|
|
parent = path[-1]
|
|
|
|
try:
|
|
children = parent_children[parent]
|
|
except KeyError:
|
|
continue
|
|
|
|
for child in children:
|
|
# Keeping a set of the path nodes, for O(1) lookups at the
|
|
# expense of more memory and complexity, actually makes speed
|
|
# worse. (Due to construction of sets.)
|
|
# This is O(N).
|
|
if child in path:
|
|
# This is a cycle.
|
|
cycle = path[path.index(child):]
|
|
# Check that this is not a dup cycle.
|
|
is_dup = False
|
|
for other_cycle in cycles:
|
|
if is_rotated(other_cycle, cycle):
|
|
is_dup = True
|
|
break
|
|
if not is_dup:
|
|
cycles.append(cycle)
|
|
yield cycle
|
|
else:
|
|
# Push this new path onto the 'stack'.
|
|
# This is probably the most expensive part of the algorithm
|
|
# (a list copy).
|
|
paths.append(path + [child])
|
|
# Mark the node as visited.
|
|
visited_nodes.add(child)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
# Run the doctest tests.
|
|
import sys
|
|
import doctest
|
|
doctest.testmod(sys.modules['__main__'])
|