I have a data set which is a large unweighted cyclic graph The cycles occur in loops of about 5-6 paths. It consists of about 8000 nodes and each node has from 1-6 (usually abou
Something like this:
#!/usr/bin/env python
from Queue import Queue
def traverse_path(fromNode, toNode, nodes):
def getNeighbours(current, nodes):
return nodes[current] if current in nodes else []
def make_path(toNode, graph):
result = []
while 'Root' != toNode:
result.append(toNode)
toNode = graph[toNode]
result.reverse()
return result
q = Queue()
q.put(fromNode)
graph = {fromNode: 'Root'}
while not q.empty():
# get the next node and add its neighbours to queue
current = q.get()
for neighbor in getNeighbours(current, nodes):
# use neighbor only continue if not already visited
if neighbor not in graph:
graph[neighbor] = current
q.put(neighbor)
# check if destination
if current == toNode:
return make_path(toNode, graph)
return []
if __name__ == '__main__':
nodes = {
'E1123': ['D111', 'D222', 'D333', 'D444'],
'D111': ['C01', 'C02', 'C04'],
'D222': ['C11', 'C03', 'C05'],
'D333': ['C01'],
'C02': ['B1'],
'B1': ['A3455']
}
result = traverse_path('E1123', 'A3455', nodes)
print result
['E1123', 'D111', 'C02', 'B1', 'A3455']
If you replace your SQL queries with a dictionary of lists (and that would be the tricky part), you will get this performance.