[Scummvm-cvs-logs] SF.net SVN: scummvm: [28651] tools/branches/gsoc2007-decompiler/finalD

brixxie at users.sourceforge.net brixxie at users.sourceforge.net
Sat Aug 18 08:19:25 CEST 2007


Revision: 28651
          http://scummvm.svn.sourceforge.net/scummvm/?rev=28651&view=rev
Author:   brixxie
Date:     2007-08-17 23:19:25 -0700 (Fri, 17 Aug 2007)

Log Message:
-----------
completed loop structuring, conditional structuring and compound
conditional structuring code

Modified Paths:
--------------
    tools/branches/gsoc2007-decompiler/finalD/cfg.py
    tools/branches/gsoc2007-decompiler/finalD/decompiler.py

Modified: tools/branches/gsoc2007-decompiler/finalD/cfg.py
===================================================================
--- tools/branches/gsoc2007-decompiler/finalD/cfg.py	2007-08-18 06:18:21 UTC (rev 28650)
+++ tools/branches/gsoc2007-decompiler/finalD/cfg.py	2007-08-18 06:19:25 UTC (rev 28651)
@@ -14,11 +14,16 @@
     two_way = 2
     """two way block"""
 
+class LT:
+    """Loop type enumeration."""
+    post_tested = 0
+    pre_tested = 1
+    endless = 2
 
 class BasicBlock:
     """BasicBlock class."""
 
-    def __init__(self, instrs, btype):
+    def __init__(self, instrs, btype, type_info=None):
         """
         Initialize BasicBlock object.
 
@@ -28,12 +33,52 @@
         """
         self.instrs = instrs
         self.btype = btype
+        self.type_info = type_info
+
         self.rev_postorder = None
+        self.idom = None
 
+        self.loop_head = None
+        self.loop_latch = None
+        self.loop_type = None
+        self.loop_follow = None
+
+        self.if_follow = None
+
     def __str__(self):
         """BasicBlock string representation."""
-        return str(self.instrs)
+        return """<BasicBlock
+        reverse post-order: %s
+        immediate dominator: %s
+        btype: %s
+        type-info: %s
+        loop_head: %s
+        loop_latch: %s
+        loop_type: %s
+        loop_follow: %s
+        if_follow: %s
+        instrs: %s>""" % \
+            (self.rev_postorder,
+             self.idom,
+             {BT.two_way : "two-way",
+              BT.one_way : "one-way",
+              BT.fall : "fall-through",
+              None: None}[self.btype],
+             self.type_info,
+             self.loop_head,
+             self.loop_latch,
+             {LT.post_tested: "post-tested",
+              LT.pre_tested: "pre-tested",
+              LT.endless: "endless",
+              None: None}[self.loop_type],
+             self.loop_follow,
+             self.if_follow,
+             self.instrs)
 
+    def __len__(self):
+        """Return len(self.instrs)."""
+        return len(self.instrs)
+
     def get_first(self):
         """Return first instruction offset in block."""
         return self.instrs[0]
@@ -46,6 +91,7 @@
 def get_bt(last_instr):
     """Return block type of block with last instruction last_instr."""
     return {CondJump : BT.two_way,
+            NegCondJump : BT.two_way,
             Jump : BT.one_way}.get(last_instr.__class__, BT.fall)
 
 def gen_basic_blocks(decoded):
@@ -63,12 +109,33 @@
                 block_instrs = []
         block_instrs.append(off)
         if is_jump(instr):
-            blocks.append(BasicBlock(block_instrs, get_bt(instr)))
+            blocks.append(BasicBlock(block_instrs,
+                                     get_bt(instr),
+                                     instr.info()))
             block_instrs = []
     if block_instrs:
         blocks.append(BasicBlock(block_instrs, BT.fall))
     return blocks
 
+def get_then_else(g, n):
+    """Return (t, e) of two-way node n in graph g."""
+    block = g.node_data(n)
+    assert(block.btype == BT.two_way)
+    positive, target, _ = block.type_info
+    s0 = g.out_nbrs(n)[0]
+    s1 = g.out_nbrs(n)[1]
+
+    if positive:
+        if target == g.node_data(s0).get_first():
+            return (s0, s1)
+        else:
+            return (s1, s0)
+    else:
+        if target == g.node_data(s0).get_first():
+            return (s1, s0)
+        else:
+            return (s0, s1)
+
 def postorder(graph):
     """
     Return a list of graph's nodes in postorder.
@@ -105,6 +172,7 @@
                     graph.hide_node(node)
                     change = True
 
+# TODO: doesn't theoretically depend on decoded anymore
 def rewire_jumps(graph, decoded):
     """
     Rewire jumps in graph for simpler control flow where possible.
@@ -137,7 +205,7 @@
                         tedge = graph.out_edges(node)[1]
                 tnode = graph.tail(tedge)
                 tblock = graph.node_data(tnode)
-                if tblock.btype == BT.one_way and len(tblock.instrs) == 1:
+                if tblock.btype == BT.one_way and len(tblock) == 1:
                     graph.hide_edge(tedge)
                     graph.add_edge(node, graph.out_nbrs(tnode)[0])
                     # let's also change the underlying Jump
@@ -214,19 +282,29 @@
     graph -- a graph
     intervals -- the list of graph intervals of graph
     """
-    def derive_graph(g, ints):
+    def derive_graph(g, intvs, first=False):
         ng = Graph.Graph()
-        for n in range(len(ints)):
-            ng.add_node(n)
-        for i, int_i in enumerate(ints):
-            for j, int_j in enumerate(ints):
+        if first:
+            for n, intv in enumerate(intvs):
+                node_data = []
+                for nid in intv:
+                    node_data.append(nid)
+                ng.add_node(n, node_data)
+        else:
+            for n, intv in enumerate(intvs):
+                node_data = []
+                for pn in intv:
+                    node_data += g.node_data(pn)
+                ng.add_node(n, node_data)
+        for i, int_i in enumerate(intvs):
+            for j, int_j in enumerate(intvs):
                 h_j = int_j[0]
                 if i != j and any(n in g.inc_nbrs(h_j) for n in int_i):
                     ng.add_edge(i, j, create_nodes=False)
         return ng
     dsequence = [(graph, intervals)]
     prev_graph = graph
-    next_graph = derive_graph(prev_graph, intervals)
+    next_graph = derive_graph(prev_graph, intervals, True)
     while next_graph.number_of_nodes() != prev_graph.number_of_nodes():
         intervals = gen_intervals(next_graph)
         dsequence.append((next_graph, intervals))
@@ -234,8 +312,82 @@
         next_graph = derive_graph(prev_graph, intervals)
     return dsequence
 
-def loop_struct(dseq, decoded):
+def mark_nodes_in_loop(g, latch, head, interval):
     """
+    Mark all nodes belonging to loop (latch, head).
+
+    Return the list of loop nodes.
+    """
+    nodes_in_loop = [head]
+
+    hblock = g.node_data(head)
+    hblock.loop_head = head
+    hblock.loop_latch = latch
+
+    lblock = g.node_data(latch)
+
+    for n in interval:
+        block = g.node_data(n)
+        if block.rev_postorder \
+                in range(hblock.rev_postorder+1, lblock.rev_postorder+1):
+            nodes_in_loop.append(n)
+            if not block.loop_head:
+                block.loop_head = head
+                block.loop_latch = latch
+    return nodes_in_loop
+
+def set_loop_type(g, latch, head, nodes_in_loop):
+    """Find loop type."""
+    lblock = g.node_data(latch)
+    hblock = g.node_data(head)
+    if lblock.btype == BT.two_way:
+        if hblock.btype == BT.two_way:
+            if all(hsucc in nodes_in_loop for hsucc in g.out_nbrs(head)):
+                lt = LT.post_tested
+            else:
+                lt = LT.pre_tested
+        else:
+            lt = LT.post_tested
+    else:
+        if hblock.btype == BT.two_way:
+            # I guess that is... "important"
+            if all(hsucc in nodes_in_loop for hsucc in g.out_nbrs(head)):
+                lt = LT.endless
+            else:
+                lt = LT.pre_tested
+        else:
+            lt = LT.endless
+    hblock.loop_type = lt
+
+def set_loop_follow(g, latch, head, nodes_in_loop):
+    """Find loop follow node."""
+    hblock = g.node_data(head)
+    if hblock.loop_type == LT.pre_tested:
+        if g.out_nbrs(head)[0] in nodes_in_loop:
+            lf = g.out_nbrs(head)[1]
+        else:
+            lf = g.out_nbrs(head)[0]
+    elif hblock.loop_type == LT.post_tested:
+        if g.out_nbrs(latch)[0] in nodes_in_loop:
+            lf = g.out_nbrs(latch)[1]
+        else:
+            lf = g.out_nbrs(latch)[0]
+    else:
+        lf = None
+        for twn in [twn for twn in nodes_in_loop
+                    if g.node_data(twn).btype == BT.two_way]:
+            n0 = g.out_nbrs(twn)[0]
+            n1 = g.out_nbrs(twn)[1]
+            if (n0 not in nodes_in_loop) and \
+                    (not lf or g.node_data(n0).rev_postorder < lf):
+                lf = n0
+            elif (n1 not in nodes_in_loop) and \
+                    (not lf or g.node_data(n1).rev_postorder < lf):
+                lf = n1
+    hblock.loop_follow = lf
+
+def loop_struct(dseq):
+    """
     Structure loops.
 
     Find loops, deduce their type (pre-tested, post-tested, endless)
@@ -243,18 +395,173 @@
 
     Arguments:
     dseq -- derived sequence of control flow graphs [(g, intvs), ...]
-    decoded -- dictionary of instr offset -> decoded instruction
     """
+    top_graph, intvs = dseq.pop(0)
+    for intv in intvs:
+        head = intv[0]
+        latch = None
+        for head_pred in top_graph.inc_nbrs(head):
+            if head_pred in intv:
+                # found a loop (head_pred, head)
+                latch = head_pred
+                nodes_in_loop = \
+                    mark_nodes_in_loop(top_graph, latch, head, intv)
+                set_loop_type(top_graph, latch, head, nodes_in_loop)
+                set_loop_follow(top_graph, latch, head, nodes_in_loop)
+                break
     for g, intvs in dseq:
         for intv in intvs:
-            header_node = intv[0]
-            # a latching node is the node where the back-edge to a loop
-            # header originates
-            latching_node = None
-            for header_pred in g.inc_nbrs(header_node):
-                if header_pred in intv:
-                    # found a loop (header_pred, header_node)
-                    latching_node = header_pred
-                    # mark_nodes_in_loop(latching_node, header_node, intv)
-                    # find loop type here
-                    # find loop follow here
+            head = g.node_data(intv[0])[0]
+            latch = None
+            for head_pred in top_graph.inc_nbrs(head):
+                top_intv = []
+                top_intv.extend(g.node_data(n) for n in intv)
+                print top_intv
+                if head_pred in top_intv:
+                    latch = head_pred
+                    nodes_in_loop = \
+                        mark_nodes_in_loop(top_graph, latch, head, top_intv)
+                    set_loop_type(top_graph, latch, head, nodes_in_loop)
+                    set_loop_follow(top_graph, latch, head, nodes_in_loop)
+
+# def back_dfs(g, start):
+#     """Fixed version of altgraph.Graph.Graph.back_dfs()."""
+#     visited, stack = set([start]), [start]
+#     while stack:
+#         curr_node = stack.pop()
+#         yield curr_node
+#         if curr_node == None:
+#             break
+#         for edge in g.inc_edges(curr_node):
+#             head = g.head(edge)
+#             if head not in visited:
+#                 visited.add(head)
+#                 stack.append(head)
+
+def get_dominators(g):
+    """Find and return the dominators for every node in g."""
+    dominators = {0 : set([0])}
+    for n in g.node_list():
+        if n != 0:
+            dominators[n] = set(g.node_list())
+    change = True
+    while change:
+        change = False
+        for n in g.node_list():
+            if n != 0:
+                all_preds = g.inc_nbrs(n)
+                new_doms = set(dominators[all_preds.pop()])
+                for doms_p in [dominators[p] for p in all_preds]:
+                    doms_p = doms_p.difference([0])
+                    new_doms.intersection_update(doms_p)
+                new_doms.add(n)
+                if len(new_doms) != len(dominators[n]):
+                    dominators[n] = new_doms
+                    change = True
+    return dominators
+
+def set_immediate_dominators(g):
+    """Find and store the immediate dominator for every node in g."""
+    dominators = get_dominators(g)
+    for n in g.node_list():
+        block = g.node_data(n)
+        doms_n = dominators[n].difference([n])
+        for dom in doms_n:
+            if not any(dom in dominators[d] for d in doms_n.difference([dom])):
+                block.idom = dom
+                break
+        if not block.idom:
+            block.idom = 0
+
+def two_way_struct(g):
+    """Structure two-way conditionals."""
+    def node_to_revpo(n):
+        """Node ID -> reverse post-order number."""
+        return g.node_data(n).rev_postorder
+    def in_head_latch(block):
+        """Return True if block is a loop header or latching node."""
+        return block.loop_latch == block or block.loop_head == block
+    nodes = g.node_list()[:]
+    nodes.sort(key=node_to_revpo, reverse=True)
+    unresolved = []
+    for m in nodes:
+        block = g.node_data(m)
+        if block.btype == BT.two_way and not in_head_latch(block):
+            dominated_nways = [i for i in g.node_list()
+                               if g.node_data(i).idom == m
+                               and g.inc_degree(i) >= 2]
+            if dominated_nways:
+                n = max(dominated_nways, key=node_to_revpo)
+                block.if_follow = n
+                for unres in unresolved:
+                    g.node_data(unres).if_follow = n
+                unresolved = []
+            else:
+                unresolved.append(m)
+
+def merge_conditionals(g, n0, n1, type):
+    """
+    Merge the two conditional jump blocks n0 and n1 into n0.
+
+    Arguments:
+    g -- a control flow graph
+    n0 -- a node in g (becomes the merge of n0 and n1)
+    n1 -- a node in g
+    cond -- the new condition for n0
+    """
+    # does NOT change underlying CondJump instruction yet
+    # it should not matter for output generated by the decompiler
+    block = g.node_data(n0)
+    pos0, targ0, cond0 = block.type_info
+    pos1, targ1, cond1 = g.node_data(n1).type_info
+    t1, e1 = get_then_else(g, n1)
+    if type == 0:
+        g.add_edge(n0, e1)
+        nt = g.node_data(e1).get_first()
+        nc = "(%s) && !(%s)" % (cond0, cond1)
+    elif type == 1:
+        g.add_edge(n0, t1)
+        nt = g.node_data(t1).get_first()
+        nc = "(%s) && (%s)" % (cond0, cond1)
+    elif type == 2:
+        g.add_edge(n0, e1)
+        nt = g.node_data(e1).get_first()
+        nc = "(%s) || (%s)" % (cond0, cond1)
+    elif type == 3:
+        g.add_edge(n0, t1)
+        nt = g.node_data(t1).get_first()
+        nc = "(%s) || !(%s)" % (cond0, cond1)
+    block.type_info = (True, nt, nc)
+    g.hide_node(n1)
+
+def comp_conds_struct(g):
+    """Structure compound conditions in g."""
+    change = True
+    while change:
+        change = False
+        for n in postorder(g):
+            block = g.node_data(n)
+            if block.btype == BT.two_way:
+                t, e = get_then_else(g, n)
+                tblock = g.node_data(t)
+                eblock = g.node_data(e)
+                if tblock.btype == BT.two_way \
+                        and len(tblock) == 1 \
+                        and g.inc_degree(t) == 1:
+                    tt, te = get_then_else(g, t)
+                    if tt == e:
+                        merge_conditionals(g, n, t, 0)
+                        change = True
+                    elif te == e:
+                        merge_conditionals(g, n, t, 1)
+                        change = True
+                elif eblock.btype == BT.two_way \
+                        and len(eblock) == 1 \
+                        and g.inc_degree(e) == 1:
+                    et, ee = get_then_else(g, e)
+                    if et == t:
+                        merge_conditionals(g, n, e, 2)
+                        change = True
+                    elif ee == t:
+                        merge_conditionals(g, n, e, 3)
+                        change = True

Modified: tools/branches/gsoc2007-decompiler/finalD/decompiler.py
===================================================================
--- tools/branches/gsoc2007-decompiler/finalD/decompiler.py	2007-08-18 06:18:21 UTC (rev 28650)
+++ tools/branches/gsoc2007-decompiler/finalD/decompiler.py	2007-08-18 06:19:25 UTC (rev 28651)
@@ -8,6 +8,8 @@
 import array
 from optparse import OptionParser
 
+from altgraph import Dot
+
 class Decompiler:
     """The Core"""
     pass
@@ -16,6 +18,8 @@
     parser = OptionParser()
     parser.add_option("-d", "--disassemble", dest="disonly",
                       action="store_true", default=False)
+    parser.add_option("-g", "--graphviz", dest="dotty",
+                      action="store_true", default=False)
     (options, args) = parser.parse_args()
     f = open(args[0], 'r')
     sc = scumm.SCUMM345(array.array('B', f.read()),
@@ -30,14 +34,27 @@
             if not options.disonly:
                 blocks = cfg.gen_basic_blocks(decoded)
                 graph = cfg.gen_control_flow_graph(blocks, decoded)
+                if options.dotty:
+                    Dot.Dot(graph).display()
+                cfg.set_immediate_dominators(graph)
                 intervals = cfg.gen_intervals(graph)
                 dseq = cfg.gen_derived_sequence(graph, intervals)
-                for dg, di in dseq:
-                    print dg
-                    for eid in dg.edge_list():
-                        print dg.edge_by_id(eid)
-                    print di
-                    print '-'*72
+#                 for dg, di in dseq:
+#                     print dg
+#                     for nid in dg.node_list():
+#                         print dg.node_data(nid)
+#                     for eid in dg.edge_list():
+#                         print dg.edge_by_id(eid)
+#                     print di
+#                     print '-'*72
+                cfg.loop_struct(dseq)
+                cfg.two_way_struct(graph)
+                cfg.comp_conds_struct(graph)
+                for nid in graph.node_list():
+                    block = graph.node_data(nid)
+                    print block
+                    for instr in block.instrs:
+                        print decoded[instr]
             else:
                 decoded_tuples = decoded.items()
                 decoded_tuples.sort(key=lambda t: t[0])


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list