[Scummvm-cvs-logs] SF.net SVN: scummvm:[41643] tools/branches/gsoc2009-decompiler/decompiler
kjdf at users.sourceforge.net
kjdf at users.sourceforge.net
Thu Jun 18 21:04:13 CEST 2009
Revision: 41643
http://scummvm.svn.sourceforge.net/scummvm/?rev=41643&view=rev
Author: kjdf
Date: 2009-06-18 19:04:13 +0000 (Thu, 18 Jun 2009)
Log Message:
-----------
decompiler: ported control flow graph to use external graph library
Modified Paths:
--------------
tools/branches/gsoc2009-decompiler/decompiler/Makefile
tools/branches/gsoc2009-decompiler/decompiler/cfg.h
tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc
tools/branches/gsoc2009-decompiler/decompiler/graph.h
tools/branches/gsoc2009-decompiler/decompiler/instruction.h
tools/branches/gsoc2009-decompiler/decompiler/misc.h
Modified: tools/branches/gsoc2009-decompiler/decompiler/Makefile
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/Makefile 2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/Makefile 2009-06-18 19:04:13 UTC (rev 41643)
@@ -8,7 +8,7 @@
decompiler: decompiler.o $(DEPS)
g++ -Wall -g -lboost_program_options $^ -o $@
-decompiler.o: decompiler.cc misc.h instruction.h parser.h reader.h cfg.h
+decompiler.o: decompiler.cc misc.h instruction.h parser.h reader.h cfg.h graph.h
g++ -Wall -g -c decompiler.cc -o decompiler.o
clean:
Modified: tools/branches/gsoc2009-decompiler/decompiler/cfg.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/cfg.h 2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/cfg.h 2009-06-18 19:04:13 UTC (rev 41643)
@@ -1,189 +1,125 @@
#ifndef CFG_H
#define CFG_H
-#include <set>
-#include <vector>
+#include <map>
+#include <list>
#include <cstdio>
using namespace std;
+#include "graph.h"
#include "instruction.h"
#include "misc.h"
-struct Node {
+struct Block {
- uint32 _id;
- static uint32 _g_id;
- vector<Node*> _in, _out;
+ Script &_script;
+ index_t _begin, _end;
- Node() {
- _id = _g_id++;
+ Block(Script &script, index_t begin, index_t end) : _script(script), _begin(begin), _end(end) {
}
- void removeInEdge(Node *from) {
- for (vector<Node*>::iterator it = _in.begin(); it != _in.end(); it++)
- if (*it == from) {
- _in.erase(it);
- return;
- }
+ virtual void print(ostream &out) {
}
- virtual ~Node() {
- };
};
-uint32 Node::_g_id = 0;
+struct BasicBlock : public Block {
-struct BasicBlock : public Node {
-
- index_t _start, _end;
-
- BasicBlock(index_t start, index_t end) : Node(), _start(start), _end(end) {
+ BasicBlock(Script &script, index_t begin, index_t end) : Block(script, begin, end) {
}
- void printHeader(Script &script) {
- printf("%d (%04x..%04x)", _id, script[_start]->_addr-8, script[_end-1]->_addr-8);
+ void print(ostream &out) {
+ for (index_t i = _begin; i <= _end; i++)
+ _script.print(out, i);
}
- virtual void print(Script &script) {
- printf("=== ");
- printHeader(script);
- printf(" in(");
- for (unsigned i = 0; i < _in.size(); i++)
- printf("%d%s", _in[i]->_id, i == _in.size()-1 ? "" : ",");
- printf(") out(");
- for (unsigned i = 0; i < _out.size(); i++)
- printf("%d%s", _out[i]->_id, i == _out.size()-1 ? "" : ",");
- printf("):\n");
- for (unsigned i = _start; i < _end; i++)
- script.print(i);
- printf("===\n\n");
- }
};
struct CFG {
- vector<BasicBlock*> _blocks;
- Script &_script;
-
- void printBasicBlocks() {
- for (uint32 i = 0; i < _blocks.size(); i++)
- _blocks[i]->print(_script);
- }
-
- void printDot() {
- printf("digraph G {\n");
- for (uint32 i = 0; i < _blocks.size(); i++) {
- BasicBlock *bb = _blocks[i];
- for (uint32 j = 0; j < bb->_out.size(); j++) {
- printf("\""); bb->printHeader(_script); printf("\"");
- printf(" -> ");
- printf("\""); ((BasicBlock*)bb->_out[j])->printHeader(_script); printf("\"");
- printf(" %s\n", j == 0 ? "[style=bold]" : "");
- }
- printf("\""); bb->printHeader(_script); printf("\"\n");
+ void printBasicBlocks(ostream &out) {
+ foreach (Block *block, _blocks) {
+ block->print(out);
+ out << endl;
}
- printf("}\n");
}
- BasicBlock *blockByStart(index_t idx) {
- for (uint32 i = 0; i < _blocks.size(); i++)
- if (_blocks[i]->_start == idx)
- return _blocks[i];
- return 0;
+ static string printer(Block *block) {
+ stringstream ret;
+ block->print(ret);
+ return ret.str();
}
- BasicBlock *blockByEnd(index_t idx) {
- for (uint32 i = 0; i < _blocks.size(); i++)
- if (_blocks[i]->_end == idx)
- return _blocks[i];
- return 0;
+ void printDot(ostream &out) {
+ out << _graph.graphvizPrint(printer);
}
- void addEdge(BasicBlock *from, BasicBlock *to) {
- from->_out.push_back(to);
- to->_in.push_back(from);
- }
-
void removeJumpsToJumps() {
for (bool changed = true; changed; ) {
changed = false;
- for (uint32 i = 0; i < _blocks.size(); i++)
- for (uint32 j = 0; j < _blocks[i]->_out.size(); j++) {
- BasicBlock *bbout = (BasicBlock*) _blocks[i]->_out[j];
- Jump *jump = dynamic_cast<Jump*>(_script[bbout->_start]);
- if (jump && !dynamic_cast<CondJump*>(jump)) {
+ foreach (Block *bu, _blocks) {
+ Node *u = _nodes[bu->_begin];
+ foreach (Node *v, u->out()) {
+ Block *bv = v->_data;
+ Jump *jump = dynamic_cast<Jump*>(_script[bv->_begin]);
+ if (jump && !dynamic_cast<CondJump*>(jump) && jump->target() != jump->_addr) {
changed = true;
- BasicBlock *newtgt = blockByStart(_script.index(jump->target()));
- bbout->removeInEdge(_blocks[i]);
- newtgt->_in.push_back(_blocks[i]);
- _blocks[i]->_out[j] = newtgt;
+ _graph.replaceEdges(u, v, _nodes[_script.index(jump->target())]);
}
}
+ }
}
}
- // TODO
- // after this _blocks[i]->_id == i no longer holds
- // also it won't work for more than one graph so we need a better way
- // (move to _blocks being a list and map<id,Block*> for quick access?)
void removeDeadBlocks() {
- set<uint32> visited;
- vector<uint32> stack;
- visited.insert(0);
- stack.push_back(0);
- while (!stack.empty()) {
- uint32 id = stack.back();
- stack.pop_back();
- Node *node = _blocks[id];
- for (uint32 i = 0; i < node->_out.size(); i++)
- if (visited.find(node->_out[i]->_id) == visited.end()) {
- visited.insert(node->_out[i]->_id);
- stack.push_back(node->_out[i]->_id);
- }
- }
- for (vector<BasicBlock*>::iterator it = _blocks.begin(); it != _blocks.end(); )
- if (visited.find((*it)->_id) == visited.end())
- it = _blocks.erase(it);
- else
- it++;
+ _graph.removeUnreachableNodes(_nodes[0]);
}
+ typedef Graph<Block*>::Node Node;
+ map<index_t, Node*> _nodes;
+ list<Block*> _blocks;
+ Script &_script;
+
+ Graph<Block*> _graph;
+
+ template<typename Container, typename Element>
+ static bool contains(const Container &c, const Element &e) {
+ return c.find(e) != c.end();
+ }
+
CFG(Script &script) : _script(script) {
- Jump *j;
- set<address_t> targets;
- targets.insert(0);
- for (index_t i = 0; i < script.size(); i++) {
- if ((j = dynamic_cast<Jump*>(script[i]))) {
- targets.insert(script.index(j->_addr+j->_offset));
- if (dynamic_cast<CondJump*>(script[i]) && i != script.size()-1)
- targets.insert(i+1);
+ _nodes[script.size()] = 0;
+ Jump *jump;
+ for (index_t i = 0; i < script.size(); i++)
+ if ((jump = dynamic_cast<Jump*>(script[i]))) {
+ _nodes[script.index(jump->target())] = 0;
+ _nodes[i+1] = 0;
}
- }
- index_t bbstart = 0;
+ index_t begin = 0;
for (index_t i = 0; i < script.size(); i++)
- if (targets.find(i+1) != targets.end() || dynamic_cast<Jump*>(script[i])) {
- _blocks.push_back(new BasicBlock(bbstart, i+1));
- bbstart = i+1;
+ if (contains(_nodes, i+1)) {
+ _blocks.push_back(new BasicBlock(script, begin, i));
+ _nodes[begin] = _graph.addNode(_blocks.back());
+ begin = i+1;
}
- if (bbstart != script.size())
- _blocks.push_back(new BasicBlock(bbstart, script.size()));
- for (index_t i = 0; i < script.size(); i++) {
- BasicBlock *bb = blockByEnd(i+1);
- if ((j = dynamic_cast<Jump*>(script[i])))
- addEdge(bb, blockByStart(script.index(j->target())));
- if (targets.find(i+1) != targets.end() || dynamic_cast<CondJump*>(script[i]))
- addEdge(bb, blockByStart(i+1));
+ foreach (Block *block, _blocks) {
+ Node *node = _nodes[block->_begin];
+ index_t end = block->_end;
+ if ((jump = dynamic_cast<Jump*>(script[end])))
+ _graph.addEdge(node, _nodes[script.index(jump->target())]);
+ if (end+1 < script.size() && contains(_nodes, end+1) && (!jump || dynamic_cast<CondJump*>(jump)))
+ _graph.addEdge(node, _nodes[end+1]);
}
- };
-
+ }
};
#endif
+
Modified: tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc 2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc 2009-06-18 19:04:13 UTC (rev 41643)
@@ -41,18 +41,18 @@
Script script(new Scumm6Parser, vars["inputfile"].as<string>().c_str());
if (vars.count("disasm")) {
for (size_t i = 0; i < script.size(); i++)
- script.print(i);
+ script.print(cout, i);
exit(0);
}
CFG cfg(script);
if (vars.count("blocks")) {
- cfg.printBasicBlocks();
+ cfg.printBasicBlocks(cout);
exit(0);
}
cfg.removeJumpsToJumps();
cfg.removeDeadBlocks();
if (vars.count("graph")) {
- cfg.printDot();
+ cfg.printDot(cout);
exit(0);
}
return 0;
Modified: tools/branches/gsoc2009-decompiler/decompiler/graph.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/graph.h 2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/graph.h 2009-06-18 19:04:13 UTC (rev 41643)
@@ -27,8 +27,10 @@
std::list<Node*> _in;
std::list<Node*> _out;
+ bool _hidden;
+ bool _visited;
- Node(const Data &data) : _data(data) {
+ Node(const Data &data) : _data(data), _hidden(false) {
}
~Node() {
@@ -51,28 +53,62 @@
return node;
}
+ void hideNode(Node *u) {
+ foreach (Node *v, u->_in)
+ v->_out.remove(u);
+ foreach (Node *v, u->_out)
+ v->_in.remove(u);
+ u->_hidden = true;
+ }
+
void addEdge(Node *from, Node *to) {
from->_out.push_back(to);
to->_in.push_back(from);
}
+ void replaceEdges(Node *from, Node *oldTo, Node *newTo) {
+ size_t n = count(oldTo->_in.begin(), oldTo->_in.end(), from);
+ oldTo->_in.remove(from);
+ fill_n(back_inserter(newTo->_in), n, from);
+ foreach (Node *&node, from->_out)
+ if (node == oldTo)
+ node = newTo;
+ }
+
+ void visit(Node *u) {
+ u->_visited = true;
+ foreach (Node *v, u->_out)
+ if (!v->_visited)
+ visit(v);
+ }
+
+ void removeUnreachableNodes(Node *start) {
+ foreach (Node *u, _nodes)
+ u->_visited = false;
+ visit(start);
+ foreach (Node *u, _nodes)
+ if (!u->_visited)
+ hideNode(u);
+ }
+
template<typename Printer> // printer is a functor taking Data and returning a string
std::string graphvizPrint(Printer printer, const std::string &fontname="Courier", int fontsize=14) const {
std::stringstream ret;
ret << "digraph G {" << std::endl;
- foreach (Node *u, _nodes) {
- ret << '"' << u << '"'
- << "[fontname=" << '"' << fontname << '"'
- << ",fontsize=" << fontsize
- << ",shape=box"
- << ",label=" << '"' << graphvizEscapeLabel(printer(u->_data)) << '"'
- << "];" << std::endl;
- foreach (Node *v, u->_out)
+ foreach (Node *u, _nodes)
+ if (!u->_hidden) {
ret << '"' << u << '"'
- << " -> "
- << '"' << v << '"'
- << ";" << std::endl;
- }
+ << "[fontname=" << '"' << fontname << '"'
+ << ",fontsize=" << fontsize
+ << ",shape=box"
+ << ",label=" << '"' << graphvizEscapeLabel(printer(u->_data)) << '"'
+ << "];" << std::endl;
+ foreach (Node *v, u->_out)
+ ret << '"' << u << '"'
+ << " -> "
+ << '"' << v << '"'
+ << ";" << std::endl;
+ }
ret << "}" << std::endl;
return ret.str();
}
Modified: tools/branches/gsoc2009-decompiler/decompiler/instruction.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/instruction.h 2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/instruction.h 2009-06-18 19:04:13 UTC (rev 41643)
@@ -66,15 +66,15 @@
return _instructions.size();
}
- void print(index_t i) {
+ void print(ostream &out, index_t i) {
if (i >= 1 && _instructions[i]->_addr == _instructions[i-1]->_addr)
- printf(" | %s", _instructions[i]->_description.c_str());
+ out << " " << _instructions[i]->_description;
else
- printf("[d] %04x | [r] %04x: %s", _instructions[i]->_addr-8, _instructions[i]->_addr, _instructions[i]->_description.c_str());
- Jump *j = dynamic_cast<Jump*>(_instructions[i]);
- if (j)
- printf(" ([d] %04x | [r] %04x)", j->target()-8, j->target());
- printf("\n");
+ out << phex(_instructions[i]->_addr-8) << " " << _instructions[i]->_description;
+ Jump *jump = dynamic_cast<Jump*>(_instructions[i]);
+ if (jump)
+ out << " (" << phex(jump->target()-8) << ")";
+ out << endl;
}
};
Modified: tools/branches/gsoc2009-decompiler/decompiler/misc.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/misc.h 2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/misc.h 2009-06-18 19:04:13 UTC (rev 41643)
@@ -2,6 +2,8 @@
#define MISC_H
#include <fstream>
+#include <sstream>
+#include <iomanip>
#include <boost/foreach.hpp>
#ifndef foreach
@@ -18,6 +20,12 @@
typedef uint32 address_t; // bytecode address
typedef uint32 index_t; // instruction number in intermediate script
+string phex(int i, int width=4) {
+ ostringstream ret;
+ ret << setfill('0') << setw(width) << i << 'h';
+ return ret.str();
+}
+
uint32 read_be_uint32(ifstream &f) {
uint32 ret = 0;
ret |= f.get() << 24;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the Scummvm-git-logs
mailing list