[Scummvm-cvs-logs] SF.net SVN: scummvm:[41431] tools/branches/gsoc2009-decompiler/decompiler

kjdf at users.sourceforge.net kjdf at users.sourceforge.net
Wed Jun 10 17:17:44 CEST 2009


Revision: 41431
          http://scummvm.svn.sourceforge.net/scummvm/?rev=41431&view=rev
Author:   kjdf
Date:     2009-06-10 15:17:44 +0000 (Wed, 10 Jun 2009)

Log Message:
-----------
decompiler: basic blocks are now single class, new Script class to manage intermediate bytecode
queries, cleaning up graph code

Modified Paths:
--------------
    tools/branches/gsoc2009-decompiler/decompiler/cfg.h
    tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc
    tools/branches/gsoc2009-decompiler/decompiler/instruction.h
    tools/branches/gsoc2009-decompiler/decompiler/misc.h
    tools/branches/gsoc2009-decompiler/decompiler/parser.h
    tools/branches/gsoc2009-decompiler/decompiler/reader.h

Modified: tools/branches/gsoc2009-decompiler/decompiler/cfg.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/cfg.h	2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/cfg.h	2009-06-10 15:17:44 UTC (rev 41431)
@@ -1,8 +1,10 @@
 #ifndef CFG_H
 #define CFG_H
 
+#include <set>
 #include <vector>
 
+
 #include <cstdio>
 
 using namespace std;
@@ -12,14 +14,31 @@
 #include "misc.h"
 
 
-struct BasicBlock {
-	static uint32 _g_id;
+struct Node {
 	uint32 _id;
-	uint32 _start, _end;
-	vector<BasicBlock*> _in;
-	BasicBlock(uint32 start, uint32 end) : _start(start), _end(end) {
+	static uint32 _g_id;
+	vector<Node*> _in, _out;
+	Node() {
 		_id = _g_id++;
 	}
+	virtual ~Node() {
+	};
+};
+
+uint32 Node::_g_id = 0;
+
+
+struct BasicBlock : public Node {
+	enum Type {
+		TwoWay,
+		OneWay,
+		End
+	};
+	Type _type;
+	index_t _start, _end;
+	BasicBlock(Type type, index_t start, index_t end) : Node(), _type(type), _start(start), _end(end) {
+	}
+
 	void printInsns(vector<Instruction*> &v) {
 		printHeader(v);
 		printf(" in(");
@@ -46,166 +65,102 @@
 	void printHeader(vector<Instruction*> &v) {
 		printf("%d (%04x..%04x)", _id, v[_start]->_addr-8, v[_end-1]->_addr-8);
 	}
-	virtual void print(vector<Instruction*> &v) = 0;
-	virtual ~BasicBlock() {
-	}
-};
-
-uint32 BasicBlock::_g_id = 0;
-
-struct BB2Way : public BasicBlock {
-	BasicBlock *_out1, *_out2;
-	BB2Way(uint32 start, uint32 end) : BasicBlock(start, end) {
-	}
-	void print(vector<Instruction*> &v) {
-		printf("=== BB2Way #");
+	virtual void print(vector<Instruction*> &v) {
+		printf("=== ");
 		printInsns(v);
 		printf("===\n\n");
 	}
-	void printOuts() {
-		printf("%d,%d", _out1->_id, _out2->_id);
-	}
-};
 
-struct BBFall : public BasicBlock {
-	BasicBlock *_out;
-	BBFall(uint32 start, uint32 end) : BasicBlock(start, end) {
-	}
-	void print(vector<Instruction*> &v) {
-		printf("=== BBFall #");
-		printInsns(v);
-		printf("===\n\n");
-	}
-	void printOuts() {
-		printf("%d", _out->_id);
-	}
 };
 
-struct BBEnd : public BasicBlock {
-	BBEnd(uint32 start, uint32 end) : BasicBlock(start, end) {
-	}
-	void print(vector<Instruction*> &v) {
-		printf("=== BBEnd #");
-		printInsns(v);
-		printf("===\n\n");
-	}
-};
 
-
 struct CFG {
 
 	vector<BasicBlock*> _blocks;
-	vector<uint32> _targets;
-	vector<Instruction*> *_v;
+	Script &_script;
 
-	void printBBs() {
+	void printBasicBlocks() {
 		for (uint32 i = 0; i < _blocks.size(); i++)
-			_blocks[i]->print(*_v);
+			_blocks[i]->print(_script._instructions);
 	}
 
 	void printDot() {
 		printf("digraph G {\n");
+		vector<Instruction*> *_v = &_script._instructions;
 		for (uint32 i = 0; i < _blocks.size(); i++) {
-			BB2Way *bb2way = dynamic_cast<BB2Way*>(_blocks[i]);
-			BBFall *bbfall = dynamic_cast<BBFall*>(_blocks[i]);
-			BBEnd  *bbend  = dynamic_cast<BBEnd*> (_blocks[i]);
-			if (bb2way) {
-				printf("\""); bb2way->printHeader(*_v); printf("\"");
+			BasicBlock *bb = _blocks[i];
+			if (bb->_type == BasicBlock::TwoWay) {
+				printf("\""); bb->printHeader(*_v); printf("\"");
 				printf(" -> ");
-				printf("\""); bb2way->_out1->printHeader(*_v); printf("\"");
+				printf("\""); ((BasicBlock*)bb->_out[0])->printHeader(*_v); printf("\"");
 				printf(" [style=bold]\n");
-				printf("\""); bb2way->printHeader(*_v); printf("\"");
+				printf("\""); bb->printHeader(*_v); printf("\"");
 				printf(" -> ");
-				printf("\""); bb2way->_out2->printHeader(*_v); printf("\"");
+				printf("\""); ((BasicBlock*)bb->_out[1])->printHeader(*_v); printf("\"");
 				printf("\n");
-			} else if (bbfall) {
-				printf("\""); bbfall->printHeader(*_v); printf("\"");
+			} else if (bb->_type == BasicBlock::OneWay) {
+				printf("\""); bb->printHeader(*_v); printf("\"");
 				printf(" -> ");
-				printf("\""); bbfall->_out->printHeader(*_v); printf("\"");
+				printf("\""); ((BasicBlock*)bb->_out[0])->printHeader(*_v); printf("\"");
 				printf(" [style=bold]\n");
-			} else if (bbend) {
-				printf("\""); bbend->printHeader(*_v); printf("\"");
+			} else if (bb->_type == BasicBlock::End) {
+				printf("\""); bb->printHeader(*_v); printf("\"");
 				printf("\n");
 			}
 		}
 		printf("}\n");
 	}
 
-	bool isTarget(uint32 addr) {
-		for (uint32 i = 0; i < _targets.size(); i++)
-			if (_targets[i] == addr)
-				return true;
-		return false;
-	}
-
-	BasicBlock *blockByStartIdx(uint32 idx) {
-		for (uint32 i = 0; i < _blocks.size(); i++)
+	BasicBlock *blockByStart(index_t idx) {
+		for (index_t i = 0; i < _blocks.size(); i++)
 			if (_blocks[i]->_start == idx)
 				return _blocks[i];
 		return 0;
 	}
 
-	BasicBlock *blockByEndIdx(uint32 idx) {
-		for (uint32 i = 0; i < _blocks.size(); i++)
+	BasicBlock *blockByEnd(index_t idx) {
+		for (index_t i = 0; i < _blocks.size(); i++)
 			if (_blocks[i]->_end == idx)
 				return _blocks[i];
 		return 0;
 	}
 
-	CFG(vector<Instruction*> &v) {
-		Script s(v);
-		_v = new vector<Instruction*>(v);
-		_targets.push_back(0);
-		for (uint32 i = 0; i < v.size(); i++) {
-			Jump *j = dynamic_cast<Jump*>(v[i]);
-			if (j) {
-				_targets.push_back(s.findIdx(j->_addr+j->_offset));
-				if (dynamic_cast<CondJump*>(v[i]) && i != v.size()-1)
-					_targets.push_back(s.findIdx(v[i+1]->_addr));
+	void addEdge(BasicBlock *from, BasicBlock *to) {
+		from->_out.push_back(to);
+		to->_in.push_back(from);
+	}
+
+	CFG(Script &script) : _script(script) {
+		Jump *j;
+		set<address_t> targets;
+		targets.insert(0);
+		for (index_t i = 0; i < script.size(); i++) {
+			if ((j = dynamic_cast<Jump*>(script[i]))) {
+				targets.insert(script.index(j->_addr+j->_offset));
+				if (dynamic_cast<CondJump*>(script[i]) && i != script.size()-1)
+					targets.insert(i+1);
 			}
 		}
-		uint32 bbstart = 0;
-		for (uint32 i = 0; i < v.size(); i++)
-			if (dynamic_cast<CondJump*>(v[i])) {
-				_blocks.push_back(new BB2Way(bbstart, i+1));
+		index_t bbstart = 0;
+		for (index_t i = 0; i < script.size(); i++)
+			if (dynamic_cast<CondJump*>(script[i])) {
+				_blocks.push_back(new BasicBlock(BasicBlock::TwoWay, bbstart, i+1));
 				bbstart = i+1;
-			}
-			else if (dynamic_cast<Jump*>(v[i])) {
-				_blocks.push_back(new BBFall(bbstart, i+1));
+			} else if (dynamic_cast<Jump*>(script[i])) {
+				_blocks.push_back(new BasicBlock(BasicBlock::OneWay, bbstart, i+1));
 				bbstart = i+1;
-			} else if (isTarget(i+1)) {
-				_blocks.push_back(new BBFall(bbstart, i+1));
+			} else if (targets.find(i+1) != targets.end()) {
+				_blocks.push_back(new BasicBlock(BasicBlock::OneWay, bbstart, i+1));
 				bbstart = i+1;
 			}
-		if (bbstart != v.size())
-			_blocks.push_back(new BBEnd(bbstart, v.size()));
-		for (uint32 i = 0; i < v.size(); i++) {
-			Jump *j = dynamic_cast<Jump*>(v[i]);
-			CondJump *cj = dynamic_cast<CondJump*>(v[i]);
-			if (cj) {
-				BB2Way *bb2way = dynamic_cast<BB2Way*>(blockByEndIdx(i+1));
-				bb2way->_out1 = blockByStartIdx(s.findIdx(cj->_addr+cj->_offset));
-				bb2way->_out2 = blockByStartIdx(s.findIdx(v[i+1]->_addr));
-			}
-			else if (j) {
-				BBFall *bbfall = dynamic_cast<BBFall*>(blockByEndIdx(i+1));
-				bbfall->_out = blockByStartIdx(s.findIdx(j->_addr+j->_offset));
-			} else if (isTarget(i+1)) {
-				BBFall *bbfall = dynamic_cast<BBFall*>(blockByEndIdx(i+1));
-				bbfall->_out = blockByStartIdx(s.findIdx(v[i+1]->_addr));
-			}
-			if (cj) {
-				BasicBlock *bb1 = blockByStartIdx(s.findIdx(cj->_addr+cj->_offset));
-				BasicBlock *bb2 = blockByStartIdx(s.findIdx(v[i+1]->_addr));
-				bb1->_in.push_back(blockByEndIdx(i+1));
-				bb2->_in.push_back(blockByEndIdx(i+1));
-			} else if (j) {
-				BasicBlock *bb1 = blockByStartIdx(s.findIdx(j->_addr+j->_offset));
-				bb1->_in.push_back(blockByEndIdx(i+1));
-			} else if (isTarget(i+1)) {
-				BasicBlock *bb2 = blockByStartIdx(s.findIdx(v[i+1]->_addr));
-				bb2->_in.push_back(blockByEndIdx(i+1));
-			}
+		if (bbstart != script.size())
+			_blocks.push_back(new BasicBlock(BasicBlock::End, bbstart, script.size()));
+		for (index_t i = 0; i < script.size(); i++) {
+			BasicBlock *bb = blockByEnd(i+1);
+			if ((j = dynamic_cast<Jump*>(script[i])))
+				addEdge(bb, blockByStart(script.index(j->target())));
+			if (targets.find(i+1) != targets.end() || dynamic_cast<CondJump*>(script[i]))
+				addEdge(bb, blockByStart(i+1));
 		}
 	};
 

Modified: tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc	2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc	2009-06-10 15:17:44 UTC (rev 41431)
@@ -31,18 +31,10 @@
 		} else
 			break;
 	}
-	vector<Instruction*> v = Scumm6Parser().parseFile(argv[argno]);
-	if (g_disasm) {
-		for (unsigned i = 0; i < v.size(); i++) {
-			if (i >= 1 && v[i]->_addr == v[i-1]->_addr)
-				printf("         |           %s\n", v[i]->_description.c_str());
-			else
-				printf("(d) %04x | (r) %04x: %s\n", v[i]->_addr-8, v[i]->_addr, v[i]->_description.c_str());
-		}
-	}
-	CFG *cfg = new CFG(v);
+	Script script(new Scumm6Parser(), argv[argno]);
+	CFG *cfg = new CFG(script);
 	if (g_blocks)
-		cfg->printBBs();
+		cfg->printBasicBlocks();
 	if (g_graph)
 		cfg->printDot();
 	return 0;

Modified: tools/branches/gsoc2009-decompiler/decompiler/instruction.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/instruction.h	2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/instruction.h	2009-06-10 15:17:44 UTC (rev 41431)
@@ -6,7 +6,6 @@
 
 #include "misc.h"
 
-
 struct Instruction {
 	string _description;
 	uint32 _addr;
@@ -19,6 +18,9 @@
 
 struct Jump : public Instruction {
 	int16 _offset;
+	uint32 target() {
+		return _addr+_offset;
+	}
 	Jump(string description, uint32 addr, int16 offset) : Instruction(description, addr), _offset(offset) {
 	}
 };
@@ -29,21 +31,40 @@
 };
 
 
+struct Script;
+
+struct Parser {
+	virtual void parseFile(Script* script, const char *filename) = 0;
+};
+
 struct Script {
 
-	vector<Instruction*> _v;
+	vector<Instruction*> _instructions;
 
-	Script(vector<Instruction*> v) : _v(v) {
+	Script(Parser *parser, const char *filename) {
+		parser->parseFile(this, filename);
 	}
 
-	uint32 findIdx(uint32 addr) {
-		for (uint32 i = 0; i < _v.size(); i++)
-			if (_v[i]->_addr == addr)
+	void append(Instruction *instr) {
+		_instructions.push_back(instr);
+	}
+
+	index_t index(address_t addr) {
+		for (index_t i = 0; i < _instructions.size(); i++)
+			if (_instructions[i]->_addr == addr)
 				return i;
 		fprintf(stderr, "!!! no instruction with address %x (%d)\n", addr, addr);
 		return -1;
 	}
 
+	Instruction* operator[](index_t i) {
+		return _instructions[i];
+	}
+
+	index_t size() {
+		return _instructions.size();
+	}
+
 };
 
 

Modified: tools/branches/gsoc2009-decompiler/decompiler/misc.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/misc.h	2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/misc.h	2009-06-10 15:17:44 UTC (rev 41431)
@@ -10,6 +10,9 @@
 typedef unsigned short uint16;
 typedef unsigned uint32;
 
+typedef uint32 address_t; // bytecode address
+typedef uint32 index_t;   // instruction number in intermediate script
+
 uint32 read_be_uint32(ifstream &f) {
 	uint32 ret = 0;
 	ret |= f.get() << 24;

Modified: tools/branches/gsoc2009-decompiler/decompiler/parser.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/parser.h	2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/parser.h	2009-06-10 15:17:44 UTC (rev 41431)
@@ -17,7 +17,7 @@
 #include "instruction.h"
 
 
-struct Scumm6Parser {
+struct Scumm6Parser : public Parser {
 
 	SubopcodeReader *_reader;
 
@@ -143,14 +143,12 @@
 		}
 	}
 
-	vector<Instruction*> parseFile(const char *filename) {
-		vector<Instruction*> v;
+	void parseFile(Script* script, const char *filename) {
 		ifstream f;
 		f.open(filename, ios::binary);
 		parseHeader(f);
-		while (_reader->readInstruction(f, v, f.tellg()))
+		while (_reader->readInstruction(f, script, f.tellg()))
 			;
-		return v;
 	}
 
 };

Modified: tools/branches/gsoc2009-decompiler/decompiler/reader.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/reader.h	2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/reader.h	2009-06-10 15:17:44 UTC (rev 41431)
@@ -20,7 +20,7 @@
 
 struct Reader {
 	// return true if all went ok and we can safely read next afterwards
-	virtual bool readInstruction(ifstream &f, vector<Instruction*> &v, uint32 addr) = 0;
+	virtual bool readInstruction(ifstream &f, Script* script, uint32 addr) = 0;
 	virtual ~Reader() {
 	}
 };
@@ -70,11 +70,11 @@
 		return true;
 	}
 
-	virtual bool readInstruction(ifstream &f, vector<Instruction*> &v, uint32 addr) {
+	virtual bool readInstruction(ifstream &f, Script *script, uint32 addr) {
 		vector<int16> args;
 		string descr;
 		if (readArguments(f, descr, args)) {
-			v.push_back(new Instruction(descr, addr));
+			script->append(new Instruction(descr, addr));
 			return true;
 		} else {
 			return false;
@@ -87,11 +87,11 @@
 struct _JmpReader : public SimpleReader {
 	_JmpReader(string description, string format="") : SimpleReader(description, format) {
 	}
-	virtual bool readInstruction(ifstream &f, vector<Instruction*> &v, uint32 addr) {
+	virtual bool readInstruction(ifstream &f, Script* script, uint32 addr) {
 		vector<int16> args;
 		string descr;
 		if (readArguments(f, descr, args)) {
-			v.push_back(new T(descr, addr, args[0]));
+			script->append(new T(descr, addr, args[0]));
 			return true;
 		} else {
 			return false;
@@ -117,9 +117,8 @@
 		_dispatchTable[opcode] = reader;
 	}
 
-	bool readInstruction(ifstream& f, vector<Instruction*> &v, uint32 addr) {
-		//		if (f.tellg() >= 0x67)
-		//			return false;
+	bool readInstruction(ifstream& f, Script *script, uint32 addr) {
+		// if (f.tellg() >= 0x67) return false; // CUT
 		uint8 opcode = f.get();
 		if (f.eof()) {
 			return false;
@@ -127,7 +126,7 @@
 			fprintf(stderr, "! unhandled opcode 0x%02x (%d) at address 0x%02x (%d)\n", opcode, opcode, addr, addr);
 			return false;
 		} else {
-			return _dispatchTable[opcode]->readInstruction(f, v, addr);
+			return _dispatchTable[opcode]->readInstruction(f, script, addr);
 		}
 	}
 };
@@ -140,8 +139,8 @@
 	SeqReader(Reader *first, Reader *second) : _first(first), _second(second) {
 	}
 
-	bool readInstruction(ifstream& f, vector<Instruction*> &v, uint32 addr) {
-		return _first->readInstruction(f, v, addr) && _second->readInstruction(f, v, addr);
+	bool readInstruction(ifstream& f, Script *script, uint32 addr) {
+		return _first->readInstruction(f, script, addr) && _second->readInstruction(f, script, addr);
 	}
 };
 


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list