[Scummvm-cvs-logs] SF.net SVN: scummvm:[41411] tools/branches/gsoc2009-decompiler/decompiler

kjdf at users.sourceforge.net kjdf at users.sourceforge.net
Tue Jun 9 23:59:15 CEST 2009


Revision: 41411
          http://scummvm.svn.sourceforge.net/scummvm/?rev=41411&view=rev
Author:   kjdf
Date:     2009-06-09 21:59:15 +0000 (Tue, 09 Jun 2009)

Log Message:
-----------
decompiler: control flow graph

Modified Paths:
--------------
    tools/branches/gsoc2009-decompiler/decompiler/Makefile
    tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc
    tools/branches/gsoc2009-decompiler/decompiler/instruction.h
    tools/branches/gsoc2009-decompiler/decompiler/misc.h
    tools/branches/gsoc2009-decompiler/decompiler/parser.h
    tools/branches/gsoc2009-decompiler/decompiler/reader.h

Added Paths:
-----------
    tools/branches/gsoc2009-decompiler/decompiler/cfg.h

Modified: tools/branches/gsoc2009-decompiler/decompiler/Makefile
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/Makefile	2009-06-09 20:54:55 UTC (rev 41410)
+++ tools/branches/gsoc2009-decompiler/decompiler/Makefile	2009-06-09 21:59:15 UTC (rev 41411)
@@ -8,7 +8,7 @@
 decompiler: decompiler.o $(DEPS)
 	g++ -Wall -g $^ -o $@
 
-decompiler.o: decompiler.cc misc.h instruction.h parser.h reader.h
+decompiler.o: decompiler.cc misc.h instruction.h parser.h reader.h cfg.h
 	g++ -Wall -g -c decompiler.cc -o decompiler.o
 
 clean:

Added: tools/branches/gsoc2009-decompiler/decompiler/cfg.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/cfg.h	                        (rev 0)
+++ tools/branches/gsoc2009-decompiler/decompiler/cfg.h	2009-06-09 21:59:15 UTC (rev 41411)
@@ -0,0 +1,176 @@
+#ifndef CFG_H
+#define CFG_H
+
+#include <vector>
+
+#include <cstdio>
+
+using namespace std;
+
+
+#include "instruction.h"
+#include "misc.h"
+
+
+struct BasicBlock {
+	static uint32 _g_id;
+	uint32 _id;
+	uint32 _start, _end;
+	vector<BasicBlock*> _in;
+	BasicBlock(uint32 start, uint32 end) : _start(start), _end(end) {
+		_id = _g_id++;
+	}
+	void printInsns(vector<Instruction*> &v) {
+		printf(" in(");
+		for (unsigned i = 0; i < _in.size(); i++)
+			printf("%d%s", _in[i]->_id, i == _in.size()-1 ? "" : ",");
+		printf(") out(");
+		printOuts();
+		printf("):\n");
+		for (unsigned i = _start; i < _end; i++) {
+			if (i >= 1 && v[i]->_addr == v[i-1]->_addr)
+				printf("         |           %s", v[i]->_description.c_str());
+			else
+				printf("[d] %04x | [r] %04x: %s", v[i]->_addr-8, v[i]->_addr, v[i]->_description.c_str());
+			Jump *j = dynamic_cast<Jump*>(v[i]);
+			if (j) {
+				uint32 jaddr = j->_addr+j->_offset;
+				printf(" ([d] %04x | [r] %04x)", jaddr-8, jaddr);
+			}
+			printf("\n");
+		}
+	}
+	virtual void printOuts() {
+	};
+	virtual void print(vector<Instruction*> &v) = 0;
+	virtual ~BasicBlock() {
+	}
+};
+
+uint32 BasicBlock::_g_id = 0;
+
+struct BB2Way : public BasicBlock {
+	BasicBlock *_out1, *_out2;
+	BB2Way(uint32 start, uint32 end) : BasicBlock(start, end) {
+	}
+	void print(vector<Instruction*> &v) {
+		printf("=== BB2Way %d [%d,%d)", _id, _start, _end);
+		printInsns(v);
+		printf("===\n\n");
+	}
+	void printOuts() {
+		printf("%d,%d", _out1->_id, _out2->_id);
+	}
+};
+
+struct BBFall : public BasicBlock {
+	BasicBlock *_out;
+	BBFall(uint32 start, uint32 end) : BasicBlock(start, end) {
+	}
+	void print(vector<Instruction*> &v) {
+		printf("=== BBFall #%d [%d,%d)", _id, _start, _end);
+		printInsns(v);
+		printf("===\n\n");
+	}
+	void printOuts() {
+		printf("%d", _out->_id);
+	}
+};
+
+struct BBEnd : public BasicBlock {
+	BBEnd(uint32 start, uint32 end) : BasicBlock(start, end) {
+	}
+	void print(vector<Instruction*> &v) {
+		printf("=== BBEnd #%d [%d,%d)", _id, _start, _end);
+		printInsns(v);
+		printf("===\n\n");
+	}
+};
+
+
+struct CFG {
+
+	vector<BasicBlock*> _blocks;
+	vector<uint32> _targets;
+
+	bool isTarget(uint32 addr) {
+		for (uint32 i = 0; i < _targets.size(); i++)
+			if (_targets[i] == addr)
+				return true;
+		return false;
+	}
+
+	BasicBlock *blockByStartIdx(uint32 idx) {
+		for (uint32 i = 0; i < _blocks.size(); i++)
+			if (_blocks[i]->_start == idx)
+				return _blocks[i];
+		return 0;
+	}
+
+	BasicBlock *blockByEndIdx(uint32 idx) {
+		for (uint32 i = 0; i < _blocks.size(); i++)
+			if (_blocks[i]->_end == idx)
+				return _blocks[i];
+		return 0;
+	}
+
+	CFG(vector<Instruction*> &v) {
+		Script s(v);
+		_targets.push_back(0);
+		for (uint32 i = 0; i < v.size(); i++) {
+			Jump *j = dynamic_cast<Jump*>(v[i]);
+			if (j) {
+				_targets.push_back(s.findIdx(j->_addr+j->_offset));
+				if (dynamic_cast<CondJump*>(v[i]) && i != v.size()-1)
+					_targets.push_back(s.findIdx(v[i+1]->_addr));
+			}
+		}
+		uint32 bbstart = 0;
+		for (uint32 i = 0; i < v.size(); i++)
+			if (dynamic_cast<CondJump*>(v[i])) {
+				_blocks.push_back(new BB2Way(bbstart, i+1));
+				bbstart = i+1;
+			}
+			else if (dynamic_cast<Jump*>(v[i])) {
+				_blocks.push_back(new BBFall(bbstart, i+1));
+				bbstart = i+1;
+			} else if (isTarget(i+1)) {
+				_blocks.push_back(new BBFall(bbstart, i+1));
+				bbstart = i+1;
+			}
+		if (bbstart != v.size())
+			_blocks.push_back(new BBEnd(bbstart, v.size()));
+		for (uint32 i = 0; i < v.size(); i++) {
+			Jump *j = dynamic_cast<Jump*>(v[i]);
+			CondJump *cj = dynamic_cast<CondJump*>(v[i]);
+			if (cj) {
+				BB2Way *bb2way = dynamic_cast<BB2Way*>(blockByEndIdx(i+1));
+				bb2way->_out1 = blockByStartIdx(s.findIdx(cj->_addr+cj->_offset));
+				bb2way->_out2 = blockByStartIdx(s.findIdx(v[i+1]->_addr));
+			}
+			else if (j) {
+				BBFall *bbfall = dynamic_cast<BBFall*>(blockByEndIdx(i+1));
+				bbfall->_out = blockByStartIdx(s.findIdx(j->_addr+j->_offset));
+			} else if (isTarget(i+1)) {
+				BBFall *bbfall = dynamic_cast<BBFall*>(blockByEndIdx(i+1));
+				bbfall->_out = blockByStartIdx(s.findIdx(v[i+1]->_addr));
+			}
+			if (cj) {
+				BasicBlock *bb1 = blockByStartIdx(s.findIdx(cj->_addr+cj->_offset));
+				BasicBlock *bb2 = blockByStartIdx(s.findIdx(v[i+1]->_addr));
+				bb1->_in.push_back(blockByEndIdx(i+1));
+				bb2->_in.push_back(blockByEndIdx(i+1));
+			} else if (j) {
+				BasicBlock *bb1 = blockByStartIdx(s.findIdx(j->_addr+j->_offset));
+				bb1->_in.push_back(blockByEndIdx(i+1));
+			} else if (isTarget(i+1)) {
+				BasicBlock *bb2 = blockByStartIdx(s.findIdx(v[i+1]->_addr));
+				bb2->_in.push_back(blockByEndIdx(i+1));
+			}
+		}
+	};
+
+};
+
+
+#endif


Property changes on: tools/branches/gsoc2009-decompiler/decompiler/cfg.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:eol-style
   + native

Modified: tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc	2009-06-09 20:54:55 UTC (rev 41410)
+++ tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc	2009-06-09 21:59:15 UTC (rev 41411)
@@ -1,15 +1,37 @@
 #include <cstdio>
+#include <cstring>
 #include <vector>
 
 using namespace std;
 
 #include "parser.h"
 #include "instruction.h"
+#include "cfg.h"
 
+bool g_disasm = false;
+bool g_bbcuts = true;
 
 int main(int argc, char **argv) {
-	vector<Instruction*> v = Scumm6Parser().parseFile(argv[1]);
-	for (unsigned i = 0; i < v.size(); i++)
-		printf("(d) %04x | (r) %04x: %s\n", v[i]->_addr-8, v[i]->_addr, v[i]->_description.c_str());
+	int argno = 1;
+	if (argno >= argc) {
+		printf("decompiler [-disasm] file.dmp\n");
+		return 0;
+	}
+	if (0 == strcmp("-disasm", argv[argno])) {
+		g_disasm = true;
+		argno++;
+	}
+	vector<Instruction*> v = Scumm6Parser().parseFile(argv[argno]);
+	if (g_disasm) {
+		for (unsigned i = 0; i < v.size(); i++) {
+			if (i >= 1 && v[i]->_addr == v[i-1]->_addr)
+				printf("         |           %s\n", v[i]->_description.c_str());
+			else
+				printf("(d) %04x | (r) %04x: %s\n", v[i]->_addr-8, v[i]->_addr, v[i]->_description.c_str());
+		}
+	}
+	CFG *cfg = new CFG(v);
+	for (uint32 i = 0; i < cfg->_blocks.size(); i++)
+		cfg->_blocks[i]->print(v);
 	return 0;
 }

Modified: tools/branches/gsoc2009-decompiler/decompiler/instruction.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/instruction.h	2009-06-09 20:54:55 UTC (rev 41410)
+++ tools/branches/gsoc2009-decompiler/decompiler/instruction.h	2009-06-09 21:59:15 UTC (rev 41411)
@@ -2,13 +2,49 @@
 #define INSTRUCTION_H
 
 #include <string>
+#include <cstdio>
 
 #include "misc.h"
 
+
 struct Instruction {
 	string _description;
 	uint32 _addr;
-	Instruction(string description, uint32 addr) : _description(description), _addr(addr) {}
+	Instruction(string description, uint32 addr) : _description(description), _addr(addr) {
+	}
+	virtual ~Instruction() {
+	}
 };
 
+
+struct Jump : public Instruction {
+	int16 _offset;
+	Jump(string description, uint32 addr, int16 offset) : Instruction(description, addr), _offset(offset) {
+	}
+};
+
+struct CondJump : public Jump {
+	CondJump(string description, uint32 addr, int16 offset) : Jump(description, addr, offset) {
+	}
+};
+
+
+struct Script {
+
+	vector<Instruction*> _v;
+
+	Script(vector<Instruction*> v) : _v(v) {
+	}
+
+	uint32 findIdx(uint32 addr) {
+		for (uint32 i = 0; i < _v.size(); i++)
+			if (_v[i]->_addr == addr)
+				return i;
+		printf("!!! no instruction with address %x (%d)\n", addr, addr);
+		return -1;
+	}
+
+};
+
+
 #endif

Modified: tools/branches/gsoc2009-decompiler/decompiler/misc.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/misc.h	2009-06-09 20:54:55 UTC (rev 41410)
+++ tools/branches/gsoc2009-decompiler/decompiler/misc.h	2009-06-09 21:59:15 UTC (rev 41411)
@@ -6,6 +6,7 @@
 using namespace std;
 
 typedef unsigned char uint8;
+typedef short int16;
 typedef unsigned short uint16;
 typedef unsigned uint32;
 

Modified: tools/branches/gsoc2009-decompiler/decompiler/parser.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/parser.h	2009-06-09 20:54:55 UTC (rev 41410)
+++ tools/branches/gsoc2009-decompiler/decompiler/parser.h	2009-06-09 21:59:15 UTC (rev 41411)
@@ -24,9 +24,10 @@
 	Scumm6Parser() {
 		_reader = new SubopcodeReader();
 		//		_reader->registerOpcode(0x00, new SimpleReader("pushByte", "b"));
-		_reader->registerOpcode(0x01, new SimpleReader("push", "w"));
-		_reader->registerOpcode(0x03, new SimpleReader("pushVar(v->s)", "w"));
+		_reader->registerOpcode(0x01, new SimpleReader("push", "W"));
+		_reader->registerOpcode(0x03, new SimpleReader("pushVar", "w"));
 		_reader->registerOpcode(0x07, new SimpleReader("wordArrayRead", "w"));
+		_reader->registerOpcode(0x0d, new SimpleReader("not"));
 		_reader->registerOpcode(0x0e, new SimpleReader("=="));
 		_reader->registerOpcode(0x0f, new SimpleReader("!="));
 		_reader->registerOpcode(0x10, new SimpleReader(">"));
@@ -34,10 +35,13 @@
 		_reader->registerOpcode(0x12, new SimpleReader("<="));
 		_reader->registerOpcode(0x13, new SimpleReader(">="));
 		_reader->registerOpcode(0x14, new SimpleReader("+"));
-		_reader->registerOpcode(0x43, new SimpleReader("writeVar(s->v)", "w"));
+		_reader->registerOpcode(0x43, new SimpleReader("writeVar", "w"));
 		_reader->registerOpcode(0x47, new SimpleReader("wordArrayWrite", "w"));
-		_reader->registerOpcode(0x4f, new SimpleReader("varInc", "w"));
-		_reader->registerOpcode(0x5d, new SimpleReader("jumpIfNot", "w"));
+		_reader->registerOpcode(0x4f, new SimpleReader("wordVarInc", "w"));
+		_reader->registerOpcode(0x5c, new CondJumpReader("jumpIf", "o3"));
+		_reader->registerOpcode(0x5d, new SeqReader(new SimpleReader("not"),
+													new CondJumpReader("jumpIf", "o3")));
+
 		_reader->registerOpcode(0x5e, new SimpleReader("startScript"));
 		_reader->registerOpcode(0x5f, new SimpleReader("startScriptQuick"));
 		_reader->registerOpcode(0x60, new SimpleReader("startObject"));
@@ -56,7 +60,7 @@
 		_reader->registerOpcode(0x6d, new SimpleReader("classOfIs"));
 		_reader->registerOpcode(0x6e, new SimpleReader("setClass"));
 		_reader->registerOpcode(0x72, new SimpleReader("getOwner"));
-		_reader->registerOpcode(0x73, new SimpleReader("jump", "w"));
+		_reader->registerOpcode(0x73, new JumpReader("jump", "o3"));
 		_reader->registerOpcode(0x7b, new SimpleReader("loadRoom"));
 		_reader->registerOpcode(0x7c, new SimpleReader("stopScript"));
 		_reader->registerOpcode(0x7d, new SimpleReader("walkActorToObj"));
@@ -80,7 +84,7 @@
 		actor->registerOpcode(87, new SimpleReader("actorOps.setTalkColor"));
 		actor->registerOpcode(95, new SimpleReader("actorOps.setIgnoreBoxes"));
 		actor->registerOpcode(99, new SimpleReader("actorOps.setTalkPos"));
-		actor->registerOpcode(0xc5, new SimpleReader("actorOps.setCurActor")); // not in scumm, descumm says it's from HE???
+		actor->registerOpcode(0xc5, new SimpleReader("actorOps.setCurActor"));
 
 		_reader->registerOpcode(0xa3, new SimpleReader("getVerbEntryPoint"));
 
@@ -90,7 +94,9 @@
 
 		SubopcodeReader *wait = new SubopcodeReader();
 		_reader->registerOpcode(0xa9, wait);
-		wait->registerOpcode(168, new SimpleReader("wait.forActor", "w"));
+		wait->registerOpcode(168, new SeqReader(new SimpleReader("wait.forActor.pushCond"),
+												new CondJumpReader("jumpIf", "o4")));
+		
 
 		_reader->registerOpcode(0xad, new SimpleReader("isAnyOf"));
 		_reader->registerOpcode(0xb0, new SimpleReader("delay"));

Modified: tools/branches/gsoc2009-decompiler/decompiler/reader.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/reader.h	2009-06-09 20:54:55 UTC (rev 41410)
+++ tools/branches/gsoc2009-decompiler/decompiler/reader.h	2009-06-09 21:59:15 UTC (rev 41411)
@@ -21,7 +21,8 @@
 struct Reader {
 	// return true if all went ok and we can safely read next afterwards
 	virtual bool readInstruction(ifstream &f, vector<Instruction*> &v, uint32 addr) = 0;
-	virtual ~Reader() {}
+	virtual ~Reader() {
+	}
 };
 
 
@@ -33,36 +34,77 @@
 	SimpleReader(string description, string format="") : _description(description), _format(format) {
 	};
 
-	bool readInstruction(ifstream &f, vector<Instruction*> &v, uint32 addr) {
-		stringstream description(stringstream::out);
-		description << _description;
+	bool readArguments(ifstream &f, string &description, vector<int16> &arguments) {
+		stringstream ssret(stringstream::out);
+		ssret << _description;
 		for (uint32 i = 0; i < _format.size(); i++)
 			switch (_format[i]) {
 			case 'w': {
 				uint16 w = read_le_uint16(f);
-				description.setf(ios::hex, ios::basefield);
-				description << "_0x" << setfill('0') << setw(4) << w;
+				ssret << ' ' << w;
 				break;
 			}
-				/*			case 'b': {
-				uint8 b = f.get();
-				description.setf(ios::dec, ios::basefield);
-				description << "_0x" << setfill('0') << setw(2) << b;
+			case 'W': {
+				int16 w = (int16) read_le_uint16(f);
+				ssret << ' ' << w;
 				break;
-				}*/
+			}
+			case 'o': { // offset, fixed to be counted from the beginning of instruction
+				int len = _format[++i] - '0';
+				int16 w = len + (int16) read_le_uint16(f);
+				arguments.push_back(w);
+				ssret << ' ' << (w>=0?'+':'-') << w;
+				break;
+			}
 			case 's':
-				description << "_\"";
+				ssret << " \"";
 				for (char c = f.get(); c != 0; c = f.get())
-					description << c;
-				description << '"';
+					ssret << c;
+				ssret << '"';
 				break;
+			default:
+				printf("! unhandled format char '%c'\n", _format[i]);
+				return false;
 			}
-		v.push_back(new Instruction(description.str(), addr));
+		description = ssret.str();
 		return true;
 	}
+
+	virtual bool readInstruction(ifstream &f, vector<Instruction*> &v, uint32 addr) {
+		vector<int16> args;
+		string descr;
+		if (readArguments(f, descr, args)) {
+			v.push_back(new Instruction(descr, addr));
+			return true;
+		} else {
+			return false;
+		}
+	}
 };
 
 
+template<typename T>
+struct _JmpReader : public SimpleReader {
+	_JmpReader(string description, string format="") : SimpleReader(description, format) {
+	}
+	virtual bool readInstruction(ifstream &f, vector<Instruction*> &v, uint32 addr) {
+		vector<int16> args;
+		string descr;
+		if (readArguments(f, descr, args)) {
+			v.push_back(new T(descr, addr, args[0]));
+			return true;
+		} else {
+			return false;
+		}
+	}
+};
+
+
+typedef _JmpReader<Jump> JumpReader;
+typedef _JmpReader<CondJump> CondJumpReader;
+
+
+
 struct SubopcodeReader : public Reader {
 
 	Reader *_dispatchTable[256];
@@ -89,4 +131,17 @@
 };
 
 
+struct SeqReader : public Reader {
+
+	Reader *_first, *_second;
+
+	SeqReader(Reader *first, Reader *second) : _first(first), _second(second) {
+	}
+
+	bool readInstruction(ifstream& f, vector<Instruction*> &v, uint32 addr) {
+		return _first->readInstruction(f, v, addr) && _second->readInstruction(f, v, addr);
+	}
+};
+
+
 #endif


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list