[Scummvm-cvs-logs] SF.net SVN: scummvm:[41643] tools/branches/gsoc2009-decompiler/decompiler

kjdf at users.sourceforge.net kjdf at users.sourceforge.net
Thu Jun 18 21:04:13 CEST 2009


Revision: 41643
          http://scummvm.svn.sourceforge.net/scummvm/?rev=41643&view=rev
Author:   kjdf
Date:     2009-06-18 19:04:13 +0000 (Thu, 18 Jun 2009)

Log Message:
-----------
decompiler: ported control flow graph to use external graph library

Modified Paths:
--------------
    tools/branches/gsoc2009-decompiler/decompiler/Makefile
    tools/branches/gsoc2009-decompiler/decompiler/cfg.h
    tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc
    tools/branches/gsoc2009-decompiler/decompiler/graph.h
    tools/branches/gsoc2009-decompiler/decompiler/instruction.h
    tools/branches/gsoc2009-decompiler/decompiler/misc.h

Modified: tools/branches/gsoc2009-decompiler/decompiler/Makefile
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/Makefile	2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/Makefile	2009-06-18 19:04:13 UTC (rev 41643)
@@ -8,7 +8,7 @@
 decompiler: decompiler.o $(DEPS)
 	g++ -Wall -g  -lboost_program_options $^ -o $@
 
-decompiler.o: decompiler.cc misc.h instruction.h parser.h reader.h cfg.h
+decompiler.o: decompiler.cc misc.h instruction.h parser.h reader.h cfg.h graph.h
 	g++ -Wall -g -c decompiler.cc -o decompiler.o
 
 clean:

Modified: tools/branches/gsoc2009-decompiler/decompiler/cfg.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/cfg.h	2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/cfg.h	2009-06-18 19:04:13 UTC (rev 41643)
@@ -1,189 +1,125 @@
 #ifndef CFG_H
 #define CFG_H
 
-#include <set>
-#include <vector>
+#include <map>
+#include <list>
 
 
 #include <cstdio>
 
 using namespace std;
 
+#include "graph.h"
 
 #include "instruction.h"
 #include "misc.h"
 
 
-struct Node {
+struct Block {
 
-	uint32 _id;
-	static uint32 _g_id;
-	vector<Node*> _in, _out;
+	Script &_script;
+	index_t _begin, _end;
 
-	Node() {
-		_id = _g_id++;
+	Block(Script &script, index_t begin, index_t end) : _script(script), _begin(begin), _end(end) {
 	}
 
-	void removeInEdge(Node *from) {
-		for (vector<Node*>::iterator it = _in.begin(); it != _in.end(); it++)
-			if (*it == from) {
-				_in.erase(it);
-				return;
-			}
+	virtual void print(ostream &out) {
 	}
 
-	virtual ~Node() {
-	};
 };
 
-uint32 Node::_g_id = 0;
 
+struct BasicBlock : public Block {
 
-struct BasicBlock : public Node {
-
-	index_t _start, _end;
-
-	BasicBlock(index_t start, index_t end) : Node(), _start(start), _end(end) {
+	BasicBlock(Script &script, index_t begin, index_t end) : Block(script, begin, end) {
 	}
 
-	void printHeader(Script &script) {
-		printf("%d (%04x..%04x)", _id, script[_start]->_addr-8, script[_end-1]->_addr-8);
+	void print(ostream &out) {
+		for (index_t i = _begin; i <= _end; i++)
+			_script.print(out, i);
 	}
 
-	virtual void print(Script &script) {
-		printf("=== ");
-		printHeader(script);
-		printf(" in(");
-		for (unsigned i = 0; i < _in.size(); i++)
-			printf("%d%s", _in[i]->_id, i == _in.size()-1 ? "" : ",");
-		printf(") out(");
-		for (unsigned i = 0; i < _out.size(); i++)
-			printf("%d%s", _out[i]->_id, i == _out.size()-1 ? "" : ",");
-		printf("):\n");
-		for (unsigned i = _start; i < _end; i++)
-			script.print(i);
-		printf("===\n\n");
-	}
 };
 
 
 struct CFG {
 
-	vector<BasicBlock*> _blocks;
-	Script &_script;
-
-	void printBasicBlocks() {
-		for (uint32 i = 0; i < _blocks.size(); i++)
-			_blocks[i]->print(_script);
-	}
-
-	void printDot() {
-		printf("digraph G {\n");
-		for (uint32 i = 0; i < _blocks.size(); i++) {
-			BasicBlock *bb = _blocks[i];
-			for (uint32 j = 0; j < bb->_out.size(); j++) {
-				printf("\""); bb->printHeader(_script); printf("\"");
-				printf(" -> ");
-				printf("\""); ((BasicBlock*)bb->_out[j])->printHeader(_script); printf("\"");
-				printf(" %s\n", j == 0 ? "[style=bold]" : "");
-			}
-			printf("\""); bb->printHeader(_script); printf("\"\n");
+	void printBasicBlocks(ostream &out) {
+		foreach (Block *block, _blocks) {
+			block->print(out);
+			out << endl;
 		}
-		printf("}\n");
 	}
 
-	BasicBlock *blockByStart(index_t idx) {
-		for (uint32 i = 0; i < _blocks.size(); i++)
-			if (_blocks[i]->_start == idx)
-				return _blocks[i];
-		return 0;
+	static string printer(Block *block) {
+		stringstream ret;
+		block->print(ret);
+		return ret.str();
 	}
 
-	BasicBlock *blockByEnd(index_t idx) {
-		for (uint32 i = 0; i < _blocks.size(); i++)
-			if (_blocks[i]->_end == idx)
-				return _blocks[i];
-		return 0;
+	void printDot(ostream &out) {
+		out << _graph.graphvizPrint(printer);
 	}
 
-	void addEdge(BasicBlock *from, BasicBlock *to) {
-		from->_out.push_back(to);
-		to->_in.push_back(from);
-	}
-
 	void removeJumpsToJumps() {
 		for (bool changed = true; changed; ) {
 			changed = false;
-			for (uint32 i = 0; i < _blocks.size(); i++)
-				for (uint32 j = 0; j < _blocks[i]->_out.size(); j++) {
-					BasicBlock *bbout = (BasicBlock*) _blocks[i]->_out[j];
-					Jump *jump = dynamic_cast<Jump*>(_script[bbout->_start]);
-					if (jump && !dynamic_cast<CondJump*>(jump)) {
+			foreach (Block *bu, _blocks) {
+				Node *u = _nodes[bu->_begin];
+				foreach (Node *v, u->out()) {
+					Block *bv = v->_data;
+					Jump *jump = dynamic_cast<Jump*>(_script[bv->_begin]);
+					if (jump && !dynamic_cast<CondJump*>(jump) && jump->target() != jump->_addr) {
 						changed = true;
-						BasicBlock *newtgt = blockByStart(_script.index(jump->target()));
-						bbout->removeInEdge(_blocks[i]);
-						newtgt->_in.push_back(_blocks[i]);
-						_blocks[i]->_out[j] = newtgt;
+						_graph.replaceEdges(u, v, _nodes[_script.index(jump->target())]);
 					}
 				}
+			}
 		}
 	}
 
-	// TODO
-	// after this _blocks[i]->_id == i no longer holds
-	// also it won't work for more than one graph so we need a better way
-	// (move to _blocks being a list and map<id,Block*> for quick access?)
 	void removeDeadBlocks() {
-		set<uint32> visited;
-		vector<uint32> stack;
-		visited.insert(0);
-		stack.push_back(0);
-		while (!stack.empty()) {
-			uint32 id = stack.back();
-			stack.pop_back();
-			Node *node = _blocks[id];
-			for (uint32 i = 0; i < node->_out.size(); i++)
-				if (visited.find(node->_out[i]->_id) == visited.end()) {
-					visited.insert(node->_out[i]->_id);
-					stack.push_back(node->_out[i]->_id);
-				}
-		}
-		for (vector<BasicBlock*>::iterator it = _blocks.begin(); it != _blocks.end(); )
-			if (visited.find((*it)->_id) == visited.end())
-				it = _blocks.erase(it);
-			else
-				it++;
+		_graph.removeUnreachableNodes(_nodes[0]);
 	}
 
+	typedef Graph<Block*>::Node Node;
+	map<index_t, Node*> _nodes;
+	list<Block*> _blocks;
+	Script &_script;
+
+	Graph<Block*> _graph;
+
+	template<typename Container, typename Element>
+	static bool contains(const Container &c, const Element &e) {
+		return c.find(e) != c.end();
+	}
+
 	CFG(Script &script) : _script(script) {
-		Jump *j;
-		set<address_t> targets;
-		targets.insert(0);
-		for (index_t i = 0; i < script.size(); i++) {
-			if ((j = dynamic_cast<Jump*>(script[i]))) {
-				targets.insert(script.index(j->_addr+j->_offset));
-				if (dynamic_cast<CondJump*>(script[i]) && i != script.size()-1)
-					targets.insert(i+1);
+		_nodes[script.size()] = 0;
+		Jump *jump;
+		for (index_t i = 0; i < script.size(); i++)
+			if ((jump = dynamic_cast<Jump*>(script[i]))) {
+				_nodes[script.index(jump->target())] = 0;
+				_nodes[i+1] = 0;
 			}
-		}
-		index_t bbstart = 0;
+		index_t begin = 0;
 		for (index_t i = 0; i < script.size(); i++)
-			if (targets.find(i+1) != targets.end() || dynamic_cast<Jump*>(script[i])) {
-				_blocks.push_back(new BasicBlock(bbstart, i+1));
-				bbstart = i+1;
+			if (contains(_nodes, i+1)) {
+				_blocks.push_back(new BasicBlock(script, begin, i));
+				_nodes[begin] = _graph.addNode(_blocks.back());
+				begin = i+1;
 			}
-		if (bbstart != script.size())
-			_blocks.push_back(new BasicBlock(bbstart, script.size()));
-		for (index_t i = 0; i < script.size(); i++) {
-			BasicBlock *bb = blockByEnd(i+1);
-			if ((j = dynamic_cast<Jump*>(script[i])))
-				addEdge(bb, blockByStart(script.index(j->target())));
-			if (targets.find(i+1) != targets.end() || dynamic_cast<CondJump*>(script[i]))
-				addEdge(bb, blockByStart(i+1));
+		foreach (Block *block, _blocks) {
+			Node *node = _nodes[block->_begin];
+			index_t end = block->_end;
+			if ((jump = dynamic_cast<Jump*>(script[end])))
+				_graph.addEdge(node, _nodes[script.index(jump->target())]);
+			if (end+1 < script.size() && contains(_nodes, end+1) && (!jump || dynamic_cast<CondJump*>(jump)))
+				_graph.addEdge(node, _nodes[end+1]);
 		}
-	};
-
+	}
 };
 
 
 #endif
+

Modified: tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc	2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc	2009-06-18 19:04:13 UTC (rev 41643)
@@ -41,18 +41,18 @@
 	Script script(new Scumm6Parser, vars["inputfile"].as<string>().c_str());
 	if (vars.count("disasm")) {
 		for (size_t i = 0; i < script.size(); i++)
-			script.print(i);
+			script.print(cout, i);
 		exit(0);
 	}
 	CFG cfg(script);
 	if (vars.count("blocks")) {
-		cfg.printBasicBlocks();
+		cfg.printBasicBlocks(cout);
 		exit(0);
 	}
 	cfg.removeJumpsToJumps();
 	cfg.removeDeadBlocks();
 	if (vars.count("graph")) {
-		cfg.printDot();
+		cfg.printDot(cout);
 		exit(0);
 	}
 	return 0;

Modified: tools/branches/gsoc2009-decompiler/decompiler/graph.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/graph.h	2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/graph.h	2009-06-18 19:04:13 UTC (rev 41643)
@@ -27,8 +27,10 @@
 
 		std::list<Node*> _in;
 		std::list<Node*> _out;
+		bool _hidden;
+		bool _visited;
 
-		Node(const Data &data) : _data(data) {
+		Node(const Data &data) : _data(data), _hidden(false) {
 		}
 
 		~Node() {
@@ -51,28 +53,62 @@
 		return node;
 	}
 
+	void hideNode(Node *u) {
+		foreach (Node *v, u->_in)
+			v->_out.remove(u);
+		foreach (Node *v, u->_out)
+			v->_in.remove(u);
+		u->_hidden = true;
+	}
+
 	void addEdge(Node *from, Node *to) {
 		from->_out.push_back(to);
 		to->_in.push_back(from);
 	}
 
+	void replaceEdges(Node *from, Node *oldTo, Node *newTo) {
+		size_t n = count(oldTo->_in.begin(), oldTo->_in.end(), from);
+		oldTo->_in.remove(from);
+		fill_n(back_inserter(newTo->_in), n, from);
+		foreach (Node *&node, from->_out)
+			if (node == oldTo)
+				node = newTo;
+	}
+
+	void visit(Node *u) {
+		u->_visited = true;
+		foreach (Node *v, u->_out)
+			if (!v->_visited)
+				visit(v);
+	}
+
+	void removeUnreachableNodes(Node *start) {
+		foreach (Node *u, _nodes)
+			u->_visited = false;
+		visit(start);
+		foreach (Node *u, _nodes)
+			if (!u->_visited)
+				hideNode(u);
+	}
+
 	template<typename Printer>   // printer is a functor taking Data and returning a string
 	std::string graphvizPrint(Printer printer, const std::string &fontname="Courier", int fontsize=14) const {
 		std::stringstream ret;
 		ret << "digraph G {" << std::endl;
-		foreach (Node *u, _nodes) {
-			ret << '"' << u << '"'
-				<< "[fontname=" << '"' << fontname << '"'
-				<< ",fontsize=" << fontsize
-				<< ",shape=box"
-				<< ",label=" << '"' << graphvizEscapeLabel(printer(u->_data)) << '"'
-				<< "];" << std::endl;
-			foreach (Node *v, u->_out)
+		foreach (Node *u, _nodes)
+			if (!u->_hidden) {
 				ret << '"' << u << '"'
-					<< " -> "
-					<< '"' << v << '"'
-					<< ";" << std::endl;
-		}
+					<< "[fontname=" << '"' << fontname << '"'
+					<< ",fontsize=" << fontsize
+					<< ",shape=box"
+					<< ",label=" << '"' << graphvizEscapeLabel(printer(u->_data)) << '"'
+					<< "];" << std::endl;
+				foreach (Node *v, u->_out)
+					ret << '"' << u << '"'
+						<< " -> "
+						<< '"' << v << '"'
+						<< ";" << std::endl;
+			}
 		ret << "}" << std::endl;
 		return ret.str();
 	}

Modified: tools/branches/gsoc2009-decompiler/decompiler/instruction.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/instruction.h	2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/instruction.h	2009-06-18 19:04:13 UTC (rev 41643)
@@ -66,15 +66,15 @@
 		return _instructions.size();
 	}
 
-	void print(index_t i) {
+	void print(ostream &out, index_t i) {
 		if (i >= 1 && _instructions[i]->_addr == _instructions[i-1]->_addr)
-			printf("         |           %s", _instructions[i]->_description.c_str());
+			out << "       " << _instructions[i]->_description;
 		else
-			printf("[d] %04x | [r] %04x: %s", _instructions[i]->_addr-8, _instructions[i]->_addr, _instructions[i]->_description.c_str());
-		Jump *j = dynamic_cast<Jump*>(_instructions[i]);
-		if (j)
-			printf(" ([d] %04x | [r] %04x)", j->target()-8, j->target());
-		printf("\n");
+			out << phex(_instructions[i]->_addr-8) << "  " << _instructions[i]->_description;
+		Jump *jump = dynamic_cast<Jump*>(_instructions[i]);
+		if (jump)
+			out << " (" << phex(jump->target()-8) << ")";
+		out << endl;
 	}
 
 };

Modified: tools/branches/gsoc2009-decompiler/decompiler/misc.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/misc.h	2009-06-18 19:03:48 UTC (rev 41642)
+++ tools/branches/gsoc2009-decompiler/decompiler/misc.h	2009-06-18 19:04:13 UTC (rev 41643)
@@ -2,6 +2,8 @@
 #define MISC_H
 
 #include <fstream>
+#include <sstream>
+#include <iomanip>
 
 #include <boost/foreach.hpp>
 #ifndef foreach
@@ -18,6 +20,12 @@
 typedef uint32 address_t; // bytecode address
 typedef uint32 index_t;   // instruction number in intermediate script
 
+string phex(int i, int width=4) {
+	ostringstream ret;
+	ret << setfill('0') << setw(width) << i << 'h';
+	return ret.str();
+}
+
 uint32 read_be_uint32(ifstream &f) {
 	uint32 ret = 0;
 	ret |= f.get() << 24;


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list