[Scummvm-cvs-logs] SF.net SVN: scummvm: [28641] tools/branches/gsoc2007-decompiler/finalD

brixxie at users.sourceforge.net brixxie at users.sourceforge.net
Fri Aug 17 07:50:46 CEST 2007


Revision: 28641
          http://scummvm.svn.sourceforge.net/scummvm/?rev=28641&view=rev
Author:   brixxie
Date:     2007-08-16 22:50:46 -0700 (Thu, 16 Aug 2007)

Log Message:
-----------
cfg.py: added control flow graph optimization functions,
beginnings of loop structuring

comments, test code, little utilities

Modified Paths:
--------------
    tools/branches/gsoc2007-decompiler/finalD/cfg.py
    tools/branches/gsoc2007-decompiler/finalD/decompiler.py
    tools/branches/gsoc2007-decompiler/finalD/iformat.py
    tools/branches/gsoc2007-decompiler/finalD/scumm.py

Modified: tools/branches/gsoc2007-decompiler/finalD/cfg.py
===================================================================
--- tools/branches/gsoc2007-decompiler/finalD/cfg.py	2007-08-17 00:54:50 UTC (rev 28640)
+++ tools/branches/gsoc2007-decompiler/finalD/cfg.py	2007-08-17 05:50:46 UTC (rev 28641)
@@ -28,6 +28,7 @@
         """
         self.instrs = instrs
         self.btype = btype
+        self.rev_postorder = None
 
     def __str__(self):
         """BasicBlock string representation."""
@@ -68,6 +69,82 @@
         blocks.append(BasicBlock(block_instrs, BT.fall))
     return blocks
 
+def postorder(graph):
+    """
+    Return a list of graph's nodes in postorder.
+
+    0 is assumed to be the start node.
+
+    Arguments:
+    graph -- a control flow graph
+    """
+    seen = {}
+    def visit(node):
+        seen[node] = True
+        for child in graph.out_nbrs(node):
+            if child not in seen:
+                for result in visit(child):
+                    yield result
+        yield node
+    return list(visit(0))
+
+def hide_dead_nodes(graph):
+    """
+    Hide dead nodes (and corresponding edges) in graph.
+
+    Nodes are considered dead iff they are unreachable from the root node.
+    The root node is always reachable.
+    """
+    change = True
+    while change:
+        change = False
+        for node in graph.node_list():
+            if node != 0:
+                inc = graph.inc_nbrs(node)
+                if (len(inc) == 1 and node in inc) or not inc:
+                    graph.hide_node(node)
+                    change = True
+
+def rewire_jumps(graph, decoded):
+    """
+    Rewire jumps in graph for simpler control flow where possible.
+
+    Any kind of jump to another unconditional jump instruction is
+    rewired to directly point to the jump target of the second jump.
+
+    TODO: If a two_way fall-through successor is a one_way..
+
+    Arguments:
+    graph -- a control flow graph
+    decoded -- dictionary of decoded instructions
+    """
+    change = True
+    while change:
+        change = False
+        for node in graph.node_list():
+            block = graph.node_data(node)
+            if block.btype in (BT.two_way, BT.one_way):
+                node_jump = decoded[block.get_last()]
+                if block.btype == BT.one_way:
+                    tedge = graph.out_edges(node)[0]
+                else:
+                    # ugly way of finding the "jump outgoing edge"
+                    e = graph.out_edges(node)[0]
+                    j = graph.node_data(graph.tail(e)).get_first()
+                    if j == node_jump.get_to():
+                        tedge = e
+                    else:
+                        tedge = graph.out_edges(node)[1]
+                tnode = graph.tail(tedge)
+                tblock = graph.node_data(tnode)
+                if tblock.btype == BT.one_way and len(tblock.instrs) == 1:
+                    graph.hide_edge(tedge)
+                    graph.add_edge(node, graph.out_nbrs(tnode)[0])
+                    # let's also change the underlying Jump
+                    tnode_jump = decoded[tblock.get_first()]
+                    node_jump.set_to(tnode_jump.get_to())
+                    change = True
+
 def gen_control_flow_graph(blocks, decoded):
     """
     Generate a control flow graph.
@@ -76,20 +153,108 @@
     blocks -- a list of BasicBlock objects
     decoded -- a dict of offset : Instr
     """
-    edges = []
     def index_key(lst, val, key):
         for i, e in enumerate(lst):
             if key(e) == val:
                 return i
+    graph = Graph.Graph()
     for i, block in enumerate(blocks):
+        graph.add_node(i, block)
+    for i, block in enumerate(blocks):
         if block.btype == BT.one_way or block.btype == BT.two_way:
             # we know the last Instr is some kind of Jump instance
             jump_target = decoded[block.get_last()].get_to()
             target_index = index_key(blocks, jump_target, BasicBlock.get_first)
-            assert(target_index != None)  # this should be non None
-            edges.append((i, target_index))
+            # assert that jump target is valid instruction offset
+            # (if not, probably a disassembly error happened before)
+            assert(target_index != None)
+            graph.add_edge(i, target_index, create_nodes=False)
         # a two-way has a fall-through too!
         if block.btype == BT.fall or block.btype == BT.two_way:
             if i+1 != len(blocks):
-                edges.append((i, i+1))
-    return Graph.Graph(edges)
+                graph.add_edge(i, i+1, create_nodes=False)
+    rewire_jumps(graph, decoded)  # simplify "redundant" jumps
+    hide_dead_nodes(graph)  # hide all unreachable nodes
+    # graph edges have been added, now tag basic blocks with their
+    # reverse postorder in the control flow graph
+    for order, node in enumerate(reversed(postorder(graph))):
+        graph.node_data(node).rev_postorder = order
+    return graph
+
+def gen_intervals(graph):
+    """Generate the non overlapping graph intervals for graph."""
+    def get_nodes_for_interval(interval):
+        return [m for m in graph.node_list()
+                if m not in interval
+                and graph.inc_nbrs(m)  # if [], next pred will always be true
+                and all(p in interval for p in graph.inc_nbrs(m))]
+    headers = set([0])
+    processed = set()
+    intervals = []
+    while any(h not in processed for h in headers):
+        h = headers.difference(processed).pop()
+        i_h = [h]
+        processed.add(h)
+        new_nodes = get_nodes_for_interval(i_h)
+        while new_nodes:
+            i_h += new_nodes
+            new_nodes = get_nodes_for_interval(i_h)
+        headers.update([m for m in graph.node_list()
+                        if m not in headers
+                        and m not in i_h
+                        and any(p in i_h for p in graph.inc_nbrs(m))])
+        intervals.append(i_h)
+    return intervals
+
+def gen_derived_sequence(graph, intervals):
+    """
+    Generate the derived sequence of graphs of graph.
+
+    Arguments:
+    graph -- a graph
+    intervals -- the list of graph intervals of graph
+    """
+    def derive_graph(g, ints):
+        ng = Graph.Graph()
+        for n in range(len(ints)):
+            ng.add_node(n)
+        for i, int_i in enumerate(ints):
+            for j, int_j in enumerate(ints):
+                h_j = int_j[0]
+                if i != j and any(n in g.inc_nbrs(h_j) for n in int_i):
+                    ng.add_edge(i, j, create_nodes=False)
+        return ng
+    dsequence = [(graph, intervals)]
+    prev_graph = graph
+    next_graph = derive_graph(prev_graph, intervals)
+    while next_graph.number_of_nodes() != prev_graph.number_of_nodes():
+        intervals = gen_intervals(next_graph)
+        dsequence.append((next_graph, intervals))
+        prev_graph = next_graph
+        next_graph = derive_graph(prev_graph, intervals)
+    return dsequence
+
+def loop_struct(dseq, decoded):
+    """
+    Structure loops.
+
+    Find loops, deduce their type (pre-tested, post-tested, endless)
+    and mark nodes belonging to loops.
+
+    Arguments:
+    dseq -- derived sequence of control flow graphs [(g, intvs), ...]
+    decoded -- dictionary of instr offset -> decoded instruction
+    """
+    for g, intvs in dseq:
+        for intv in intvs:
+            header_node = intv[0]
+            # a latching node is the node where the back-edge to a loop
+            # header originates
+            latching_node = None
+            for header_pred in g.inc_nbrs(header_node):
+                if header_pred in intv:
+                    # found a loop (header_pred, header_node)
+                    latching_node = header_pred
+                    # mark_nodes_in_loop(latching_node, header_node, intv)
+                    # find loop type here
+                    # find loop follow here

Modified: tools/branches/gsoc2007-decompiler/finalD/decompiler.py
===================================================================
--- tools/branches/gsoc2007-decompiler/finalD/decompiler.py	2007-08-17 00:54:50 UTC (rev 28640)
+++ tools/branches/gsoc2007-decompiler/finalD/decompiler.py	2007-08-17 05:50:46 UTC (rev 28641)
@@ -14,10 +14,10 @@
 
 if __name__ == '__main__':
     parser = OptionParser()
-    parser.add_option("-f", "--file", dest="filename")
-    parser.add_option("-d", "--disassemble", dest="disonly", action="store_true", default=False)
+    parser.add_option("-d", "--disassemble", dest="disonly",
+                      action="store_true", default=False)
     (options, args) = parser.parse_args()
-    f = open(options.filename, 'r')
+    f = open(args[0], 'r')
     sc = scumm.SCUMM345(array.array('B', f.read()),
                         3,
                         indy_flag = True,
@@ -26,17 +26,25 @@
         sc.parse_header()
         dis = disasm.Disasm(sc)
         decoded = dis.decode()
-        if not options.disonly:
-            blocks = cfg.gen_basic_blocks(decoded)
-            graph = cfg.gen_control_flow_graph(blocks, decoded)
-            print "%s" % graph
-        else:
-            decoded_tuples = decoded.items()
-            decoded_tuples.sort(key=lambda t: t[0])
-            for off, instr in decoded_tuples:
-                print "[%.4X] %s;" % (off, instr)
+        if decoded:
+            if not options.disonly:
+                blocks = cfg.gen_basic_blocks(decoded)
+                graph = cfg.gen_control_flow_graph(blocks, decoded)
+                intervals = cfg.gen_intervals(graph)
+                dseq = cfg.gen_derived_sequence(graph, intervals)
+                for dg, di in dseq:
+                    print dg
+                    for eid in dg.edge_list():
+                        print dg.edge_by_id(eid)
+                    print di
+                    print '-'*72
+            else:
+                decoded_tuples = decoded.items()
+                decoded_tuples.sort(key=lambda t: t[0])
+                for off, instr in decoded_tuples:
+                    print "[%.4X] %s;" % (off, instr)
     except:
-        print "ERRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRROOOOOOOOOOOOOOOOOOOOOORRRRRRRRRRRRRRR"
-        print options.filename
+        print "ERRRRRRRRRRRRRRRRRRRRRRRRRRRRRROOOOOOOOOOOOOOOOOOOOOORRRRRRRRRRRR"
+        print args[0]
         raise
     print "END"

Modified: tools/branches/gsoc2007-decompiler/finalD/iformat.py
===================================================================
--- tools/branches/gsoc2007-decompiler/finalD/iformat.py	2007-08-17 00:54:50 UTC (rev 28640)
+++ tools/branches/gsoc2007-decompiler/finalD/iformat.py	2007-08-17 05:50:46 UTC (rev 28641)
@@ -27,7 +27,7 @@
     """Assignment Instruction in intermediate format."""
 
     def __init__(self, op,  dst, val):
-        Instr.__init__(self, op, (dst, val))
+        Instr.__init__(self, op, [dst, val])
 
     def __str__(self):
         dst, val = self.args
@@ -37,11 +37,14 @@
     """Unconditional jump instruction in intermediate format."""
 
     def __init__(self, to):
-        Instr.__init__(self, "goto", (to,))
+        Instr.__init__(self, "goto", [to,])
 
     def get_to(self):
         return self.args[0]
 
+    def set_to(self, to):
+        self.args[0] = to
+
     def __str__(self):
         return "goto %.4X" % self.args[0]
 
@@ -57,5 +60,9 @@
 
 
 def is_jump(instr):
-    """Return true if instr is a Jump."""
+    """Return True if instr is a Jump."""
     return instr.__class__ == Jump or instr.__class__ == CondJump
+
+def is_cond_jump(instr):
+    """Return True if instr is a conditional Jump."""
+    return instr.__class__ == CondJump

Modified: tools/branches/gsoc2007-decompiler/finalD/scumm.py
===================================================================
--- tools/branches/gsoc2007-decompiler/finalD/scumm.py	2007-08-17 00:54:50 UTC (rev 28640)
+++ tools/branches/gsoc2007-decompiler/finalD/scumm.py	2007-08-17 05:50:46 UTC (rev 28641)
@@ -393,6 +393,7 @@
         self.halt_on_error = halt_on_error
         self.script_start = 0
 
+        # called last because ByteCode.__init__() calls self.populate()
         bytecode.ByteCode.__init__(self, init, SCUMM.get_byte)
 
     def get_pos(self):


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list