[Scummvm-cvs-logs] SF.net SVN: scummvm: [28641] tools/branches/gsoc2007-decompiler/finalD
brixxie at users.sourceforge.net
brixxie at users.sourceforge.net
Fri Aug 17 07:50:46 CEST 2007
Revision: 28641
http://scummvm.svn.sourceforge.net/scummvm/?rev=28641&view=rev
Author: brixxie
Date: 2007-08-16 22:50:46 -0700 (Thu, 16 Aug 2007)
Log Message:
-----------
cfg.py: added control flow graph optimization functions,
beginnings of loop structuring
comments, test code, little utilities
Modified Paths:
--------------
tools/branches/gsoc2007-decompiler/finalD/cfg.py
tools/branches/gsoc2007-decompiler/finalD/decompiler.py
tools/branches/gsoc2007-decompiler/finalD/iformat.py
tools/branches/gsoc2007-decompiler/finalD/scumm.py
Modified: tools/branches/gsoc2007-decompiler/finalD/cfg.py
===================================================================
--- tools/branches/gsoc2007-decompiler/finalD/cfg.py 2007-08-17 00:54:50 UTC (rev 28640)
+++ tools/branches/gsoc2007-decompiler/finalD/cfg.py 2007-08-17 05:50:46 UTC (rev 28641)
@@ -28,6 +28,7 @@
"""
self.instrs = instrs
self.btype = btype
+ self.rev_postorder = None
def __str__(self):
"""BasicBlock string representation."""
@@ -68,6 +69,82 @@
blocks.append(BasicBlock(block_instrs, BT.fall))
return blocks
+def postorder(graph):
+ """
+ Return a list of graph's nodes in postorder.
+
+ 0 is assumed to be the start node.
+
+ Arguments:
+ graph -- a control flow graph
+ """
+ seen = {}
+ def visit(node):
+ seen[node] = True
+ for child in graph.out_nbrs(node):
+ if child not in seen:
+ for result in visit(child):
+ yield result
+ yield node
+ return list(visit(0))
+
+def hide_dead_nodes(graph):
+ """
+ Hide dead nodes (and corresponding edges) in graph.
+
+ Nodes are considered dead iff they are unreachable from the root node.
+ The root node is always reachable.
+ """
+ change = True
+ while change:
+ change = False
+ for node in graph.node_list():
+ if node != 0:
+ inc = graph.inc_nbrs(node)
+ if (len(inc) == 1 and node in inc) or not inc:
+ graph.hide_node(node)
+ change = True
+
+def rewire_jumps(graph, decoded):
+ """
+ Rewire jumps in graph for simpler control flow where possible.
+
+ Any kind of jump to another unconditional jump instruction is
+ rewired to directly point to the jump target of the second jump.
+
+ TODO: If a two_way fall-through successor is a one_way..
+
+ Arguments:
+ graph -- a control flow graph
+ decoded -- dictionary of decoded instructions
+ """
+ change = True
+ while change:
+ change = False
+ for node in graph.node_list():
+ block = graph.node_data(node)
+ if block.btype in (BT.two_way, BT.one_way):
+ node_jump = decoded[block.get_last()]
+ if block.btype == BT.one_way:
+ tedge = graph.out_edges(node)[0]
+ else:
+ # ugly way of finding the "jump outgoing edge"
+ e = graph.out_edges(node)[0]
+ j = graph.node_data(graph.tail(e)).get_first()
+ if j == node_jump.get_to():
+ tedge = e
+ else:
+ tedge = graph.out_edges(node)[1]
+ tnode = graph.tail(tedge)
+ tblock = graph.node_data(tnode)
+ if tblock.btype == BT.one_way and len(tblock.instrs) == 1:
+ graph.hide_edge(tedge)
+ graph.add_edge(node, graph.out_nbrs(tnode)[0])
+ # let's also change the underlying Jump
+ tnode_jump = decoded[tblock.get_first()]
+ node_jump.set_to(tnode_jump.get_to())
+ change = True
+
def gen_control_flow_graph(blocks, decoded):
"""
Generate a control flow graph.
@@ -76,20 +153,108 @@
blocks -- a list of BasicBlock objects
decoded -- a dict of offset : Instr
"""
- edges = []
def index_key(lst, val, key):
for i, e in enumerate(lst):
if key(e) == val:
return i
+ graph = Graph.Graph()
for i, block in enumerate(blocks):
+ graph.add_node(i, block)
+ for i, block in enumerate(blocks):
if block.btype == BT.one_way or block.btype == BT.two_way:
# we know the last Instr is some kind of Jump instance
jump_target = decoded[block.get_last()].get_to()
target_index = index_key(blocks, jump_target, BasicBlock.get_first)
- assert(target_index != None) # this should be non None
- edges.append((i, target_index))
+ # assert that jump target is valid instruction offset
+ # (if not, probably a disassembly error happened before)
+ assert(target_index != None)
+ graph.add_edge(i, target_index, create_nodes=False)
# a two-way has a fall-through too!
if block.btype == BT.fall or block.btype == BT.two_way:
if i+1 != len(blocks):
- edges.append((i, i+1))
- return Graph.Graph(edges)
+ graph.add_edge(i, i+1, create_nodes=False)
+ rewire_jumps(graph, decoded) # simplify "redundant" jumps
+ hide_dead_nodes(graph) # hide all unreachable nodes
+ # graph edges have been added, now tag basic blocks with their
+ # reverse postorder in the control flow graph
+ for order, node in enumerate(reversed(postorder(graph))):
+ graph.node_data(node).rev_postorder = order
+ return graph
+
+def gen_intervals(graph):
+ """Generate the non overlapping graph intervals for graph."""
+ def get_nodes_for_interval(interval):
+ return [m for m in graph.node_list()
+ if m not in interval
+ and graph.inc_nbrs(m) # if [], next pred will always be true
+ and all(p in interval for p in graph.inc_nbrs(m))]
+ headers = set([0])
+ processed = set()
+ intervals = []
+ while any(h not in processed for h in headers):
+ h = headers.difference(processed).pop()
+ i_h = [h]
+ processed.add(h)
+ new_nodes = get_nodes_for_interval(i_h)
+ while new_nodes:
+ i_h += new_nodes
+ new_nodes = get_nodes_for_interval(i_h)
+ headers.update([m for m in graph.node_list()
+ if m not in headers
+ and m not in i_h
+ and any(p in i_h for p in graph.inc_nbrs(m))])
+ intervals.append(i_h)
+ return intervals
+
+def gen_derived_sequence(graph, intervals):
+ """
+ Generate the derived sequence of graphs of graph.
+
+ Arguments:
+ graph -- a graph
+ intervals -- the list of graph intervals of graph
+ """
+ def derive_graph(g, ints):
+ ng = Graph.Graph()
+ for n in range(len(ints)):
+ ng.add_node(n)
+ for i, int_i in enumerate(ints):
+ for j, int_j in enumerate(ints):
+ h_j = int_j[0]
+ if i != j and any(n in g.inc_nbrs(h_j) for n in int_i):
+ ng.add_edge(i, j, create_nodes=False)
+ return ng
+ dsequence = [(graph, intervals)]
+ prev_graph = graph
+ next_graph = derive_graph(prev_graph, intervals)
+ while next_graph.number_of_nodes() != prev_graph.number_of_nodes():
+ intervals = gen_intervals(next_graph)
+ dsequence.append((next_graph, intervals))
+ prev_graph = next_graph
+ next_graph = derive_graph(prev_graph, intervals)
+ return dsequence
+
+def loop_struct(dseq, decoded):
+ """
+ Structure loops.
+
+ Find loops, deduce their type (pre-tested, post-tested, endless)
+ and mark nodes belonging to loops.
+
+ Arguments:
+ dseq -- derived sequence of control flow graphs [(g, intvs), ...]
+ decoded -- dictionary of instr offset -> decoded instruction
+ """
+ for g, intvs in dseq:
+ for intv in intvs:
+ header_node = intv[0]
+ # a latching node is the node where the back-edge to a loop
+ # header originates
+ latching_node = None
+ for header_pred in g.inc_nbrs(header_node):
+ if header_pred in intv:
+ # found a loop (header_pred, header_node)
+ latching_node = header_pred
+ # mark_nodes_in_loop(latching_node, header_node, intv)
+ # find loop type here
+ # find loop follow here
Modified: tools/branches/gsoc2007-decompiler/finalD/decompiler.py
===================================================================
--- tools/branches/gsoc2007-decompiler/finalD/decompiler.py 2007-08-17 00:54:50 UTC (rev 28640)
+++ tools/branches/gsoc2007-decompiler/finalD/decompiler.py 2007-08-17 05:50:46 UTC (rev 28641)
@@ -14,10 +14,10 @@
if __name__ == '__main__':
parser = OptionParser()
- parser.add_option("-f", "--file", dest="filename")
- parser.add_option("-d", "--disassemble", dest="disonly", action="store_true", default=False)
+ parser.add_option("-d", "--disassemble", dest="disonly",
+ action="store_true", default=False)
(options, args) = parser.parse_args()
- f = open(options.filename, 'r')
+ f = open(args[0], 'r')
sc = scumm.SCUMM345(array.array('B', f.read()),
3,
indy_flag = True,
@@ -26,17 +26,25 @@
sc.parse_header()
dis = disasm.Disasm(sc)
decoded = dis.decode()
- if not options.disonly:
- blocks = cfg.gen_basic_blocks(decoded)
- graph = cfg.gen_control_flow_graph(blocks, decoded)
- print "%s" % graph
- else:
- decoded_tuples = decoded.items()
- decoded_tuples.sort(key=lambda t: t[0])
- for off, instr in decoded_tuples:
- print "[%.4X] %s;" % (off, instr)
+ if decoded:
+ if not options.disonly:
+ blocks = cfg.gen_basic_blocks(decoded)
+ graph = cfg.gen_control_flow_graph(blocks, decoded)
+ intervals = cfg.gen_intervals(graph)
+ dseq = cfg.gen_derived_sequence(graph, intervals)
+ for dg, di in dseq:
+ print dg
+ for eid in dg.edge_list():
+ print dg.edge_by_id(eid)
+ print di
+ print '-'*72
+ else:
+ decoded_tuples = decoded.items()
+ decoded_tuples.sort(key=lambda t: t[0])
+ for off, instr in decoded_tuples:
+ print "[%.4X] %s;" % (off, instr)
except:
- print "ERRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRROOOOOOOOOOOOOOOOOOOOOORRRRRRRRRRRRRRR"
- print options.filename
+ print "ERRRRRRRRRRRRRRRRRRRRRRRRRRRRRROOOOOOOOOOOOOOOOOOOOOORRRRRRRRRRRR"
+ print args[0]
raise
print "END"
Modified: tools/branches/gsoc2007-decompiler/finalD/iformat.py
===================================================================
--- tools/branches/gsoc2007-decompiler/finalD/iformat.py 2007-08-17 00:54:50 UTC (rev 28640)
+++ tools/branches/gsoc2007-decompiler/finalD/iformat.py 2007-08-17 05:50:46 UTC (rev 28641)
@@ -27,7 +27,7 @@
"""Assignment Instruction in intermediate format."""
def __init__(self, op, dst, val):
- Instr.__init__(self, op, (dst, val))
+ Instr.__init__(self, op, [dst, val])
def __str__(self):
dst, val = self.args
@@ -37,11 +37,14 @@
"""Unconditional jump instruction in intermediate format."""
def __init__(self, to):
- Instr.__init__(self, "goto", (to,))
+ Instr.__init__(self, "goto", [to,])
def get_to(self):
return self.args[0]
+ def set_to(self, to):
+ self.args[0] = to
+
def __str__(self):
return "goto %.4X" % self.args[0]
@@ -57,5 +60,9 @@
def is_jump(instr):
- """Return true if instr is a Jump."""
+ """Return True if instr is a Jump."""
return instr.__class__ == Jump or instr.__class__ == CondJump
+
+def is_cond_jump(instr):
+ """Return True if instr is a conditional Jump."""
+ return instr.__class__ == CondJump
Modified: tools/branches/gsoc2007-decompiler/finalD/scumm.py
===================================================================
--- tools/branches/gsoc2007-decompiler/finalD/scumm.py 2007-08-17 00:54:50 UTC (rev 28640)
+++ tools/branches/gsoc2007-decompiler/finalD/scumm.py 2007-08-17 05:50:46 UTC (rev 28641)
@@ -393,6 +393,7 @@
self.halt_on_error = halt_on_error
self.script_start = 0
+ # called last because ByteCode.__init__() calls self.populate()
bytecode.ByteCode.__init__(self, init, SCUMM.get_byte)
def get_pos(self):
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the Scummvm-git-logs
mailing list