[Scummvm-cvs-logs] SF.net SVN: scummvm:[41431] tools/branches/gsoc2009-decompiler/decompiler
kjdf at users.sourceforge.net
kjdf at users.sourceforge.net
Wed Jun 10 17:17:44 CEST 2009
Revision: 41431
http://scummvm.svn.sourceforge.net/scummvm/?rev=41431&view=rev
Author: kjdf
Date: 2009-06-10 15:17:44 +0000 (Wed, 10 Jun 2009)
Log Message:
-----------
decompiler: basic blocks are now single class, new Script class to manage intermediate bytecode
queries, cleaning up graph code
Modified Paths:
--------------
tools/branches/gsoc2009-decompiler/decompiler/cfg.h
tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc
tools/branches/gsoc2009-decompiler/decompiler/instruction.h
tools/branches/gsoc2009-decompiler/decompiler/misc.h
tools/branches/gsoc2009-decompiler/decompiler/parser.h
tools/branches/gsoc2009-decompiler/decompiler/reader.h
Modified: tools/branches/gsoc2009-decompiler/decompiler/cfg.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/cfg.h 2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/cfg.h 2009-06-10 15:17:44 UTC (rev 41431)
@@ -1,8 +1,10 @@
#ifndef CFG_H
#define CFG_H
+#include <set>
#include <vector>
+
#include <cstdio>
using namespace std;
@@ -12,14 +14,31 @@
#include "misc.h"
-struct BasicBlock {
- static uint32 _g_id;
+struct Node {
uint32 _id;
- uint32 _start, _end;
- vector<BasicBlock*> _in;
- BasicBlock(uint32 start, uint32 end) : _start(start), _end(end) {
+ static uint32 _g_id;
+ vector<Node*> _in, _out;
+ Node() {
_id = _g_id++;
}
+ virtual ~Node() {
+ };
+};
+
+uint32 Node::_g_id = 0;
+
+
+struct BasicBlock : public Node {
+ enum Type {
+ TwoWay,
+ OneWay,
+ End
+ };
+ Type _type;
+ index_t _start, _end;
+ BasicBlock(Type type, index_t start, index_t end) : Node(), _type(type), _start(start), _end(end) {
+ }
+
void printInsns(vector<Instruction*> &v) {
printHeader(v);
printf(" in(");
@@ -46,166 +65,102 @@
void printHeader(vector<Instruction*> &v) {
printf("%d (%04x..%04x)", _id, v[_start]->_addr-8, v[_end-1]->_addr-8);
}
- virtual void print(vector<Instruction*> &v) = 0;
- virtual ~BasicBlock() {
- }
-};
-
-uint32 BasicBlock::_g_id = 0;
-
-struct BB2Way : public BasicBlock {
- BasicBlock *_out1, *_out2;
- BB2Way(uint32 start, uint32 end) : BasicBlock(start, end) {
- }
- void print(vector<Instruction*> &v) {
- printf("=== BB2Way #");
+ virtual void print(vector<Instruction*> &v) {
+ printf("=== ");
printInsns(v);
printf("===\n\n");
}
- void printOuts() {
- printf("%d,%d", _out1->_id, _out2->_id);
- }
-};
-struct BBFall : public BasicBlock {
- BasicBlock *_out;
- BBFall(uint32 start, uint32 end) : BasicBlock(start, end) {
- }
- void print(vector<Instruction*> &v) {
- printf("=== BBFall #");
- printInsns(v);
- printf("===\n\n");
- }
- void printOuts() {
- printf("%d", _out->_id);
- }
};
-struct BBEnd : public BasicBlock {
- BBEnd(uint32 start, uint32 end) : BasicBlock(start, end) {
- }
- void print(vector<Instruction*> &v) {
- printf("=== BBEnd #");
- printInsns(v);
- printf("===\n\n");
- }
-};
-
struct CFG {
vector<BasicBlock*> _blocks;
- vector<uint32> _targets;
- vector<Instruction*> *_v;
+ Script &_script;
- void printBBs() {
+ void printBasicBlocks() {
for (uint32 i = 0; i < _blocks.size(); i++)
- _blocks[i]->print(*_v);
+ _blocks[i]->print(_script._instructions);
}
void printDot() {
printf("digraph G {\n");
+ vector<Instruction*> *_v = &_script._instructions;
for (uint32 i = 0; i < _blocks.size(); i++) {
- BB2Way *bb2way = dynamic_cast<BB2Way*>(_blocks[i]);
- BBFall *bbfall = dynamic_cast<BBFall*>(_blocks[i]);
- BBEnd *bbend = dynamic_cast<BBEnd*> (_blocks[i]);
- if (bb2way) {
- printf("\""); bb2way->printHeader(*_v); printf("\"");
+ BasicBlock *bb = _blocks[i];
+ if (bb->_type == BasicBlock::TwoWay) {
+ printf("\""); bb->printHeader(*_v); printf("\"");
printf(" -> ");
- printf("\""); bb2way->_out1->printHeader(*_v); printf("\"");
+ printf("\""); ((BasicBlock*)bb->_out[0])->printHeader(*_v); printf("\"");
printf(" [style=bold]\n");
- printf("\""); bb2way->printHeader(*_v); printf("\"");
+ printf("\""); bb->printHeader(*_v); printf("\"");
printf(" -> ");
- printf("\""); bb2way->_out2->printHeader(*_v); printf("\"");
+ printf("\""); ((BasicBlock*)bb->_out[1])->printHeader(*_v); printf("\"");
printf("\n");
- } else if (bbfall) {
- printf("\""); bbfall->printHeader(*_v); printf("\"");
+ } else if (bb->_type == BasicBlock::OneWay) {
+ printf("\""); bb->printHeader(*_v); printf("\"");
printf(" -> ");
- printf("\""); bbfall->_out->printHeader(*_v); printf("\"");
+ printf("\""); ((BasicBlock*)bb->_out[0])->printHeader(*_v); printf("\"");
printf(" [style=bold]\n");
- } else if (bbend) {
- printf("\""); bbend->printHeader(*_v); printf("\"");
+ } else if (bb->_type == BasicBlock::End) {
+ printf("\""); bb->printHeader(*_v); printf("\"");
printf("\n");
}
}
printf("}\n");
}
- bool isTarget(uint32 addr) {
- for (uint32 i = 0; i < _targets.size(); i++)
- if (_targets[i] == addr)
- return true;
- return false;
- }
-
- BasicBlock *blockByStartIdx(uint32 idx) {
- for (uint32 i = 0; i < _blocks.size(); i++)
+ BasicBlock *blockByStart(index_t idx) {
+ for (index_t i = 0; i < _blocks.size(); i++)
if (_blocks[i]->_start == idx)
return _blocks[i];
return 0;
}
- BasicBlock *blockByEndIdx(uint32 idx) {
- for (uint32 i = 0; i < _blocks.size(); i++)
+ BasicBlock *blockByEnd(index_t idx) {
+ for (index_t i = 0; i < _blocks.size(); i++)
if (_blocks[i]->_end == idx)
return _blocks[i];
return 0;
}
- CFG(vector<Instruction*> &v) {
- Script s(v);
- _v = new vector<Instruction*>(v);
- _targets.push_back(0);
- for (uint32 i = 0; i < v.size(); i++) {
- Jump *j = dynamic_cast<Jump*>(v[i]);
- if (j) {
- _targets.push_back(s.findIdx(j->_addr+j->_offset));
- if (dynamic_cast<CondJump*>(v[i]) && i != v.size()-1)
- _targets.push_back(s.findIdx(v[i+1]->_addr));
+ void addEdge(BasicBlock *from, BasicBlock *to) {
+ from->_out.push_back(to);
+ to->_in.push_back(from);
+ }
+
+ CFG(Script &script) : _script(script) {
+ Jump *j;
+ set<address_t> targets;
+ targets.insert(0);
+ for (index_t i = 0; i < script.size(); i++) {
+ if ((j = dynamic_cast<Jump*>(script[i]))) {
+ targets.insert(script.index(j->_addr+j->_offset));
+ if (dynamic_cast<CondJump*>(script[i]) && i != script.size()-1)
+ targets.insert(i+1);
}
}
- uint32 bbstart = 0;
- for (uint32 i = 0; i < v.size(); i++)
- if (dynamic_cast<CondJump*>(v[i])) {
- _blocks.push_back(new BB2Way(bbstart, i+1));
+ index_t bbstart = 0;
+ for (index_t i = 0; i < script.size(); i++)
+ if (dynamic_cast<CondJump*>(script[i])) {
+ _blocks.push_back(new BasicBlock(BasicBlock::TwoWay, bbstart, i+1));
bbstart = i+1;
- }
- else if (dynamic_cast<Jump*>(v[i])) {
- _blocks.push_back(new BBFall(bbstart, i+1));
+ } else if (dynamic_cast<Jump*>(script[i])) {
+ _blocks.push_back(new BasicBlock(BasicBlock::OneWay, bbstart, i+1));
bbstart = i+1;
- } else if (isTarget(i+1)) {
- _blocks.push_back(new BBFall(bbstart, i+1));
+ } else if (targets.find(i+1) != targets.end()) {
+ _blocks.push_back(new BasicBlock(BasicBlock::OneWay, bbstart, i+1));
bbstart = i+1;
}
- if (bbstart != v.size())
- _blocks.push_back(new BBEnd(bbstart, v.size()));
- for (uint32 i = 0; i < v.size(); i++) {
- Jump *j = dynamic_cast<Jump*>(v[i]);
- CondJump *cj = dynamic_cast<CondJump*>(v[i]);
- if (cj) {
- BB2Way *bb2way = dynamic_cast<BB2Way*>(blockByEndIdx(i+1));
- bb2way->_out1 = blockByStartIdx(s.findIdx(cj->_addr+cj->_offset));
- bb2way->_out2 = blockByStartIdx(s.findIdx(v[i+1]->_addr));
- }
- else if (j) {
- BBFall *bbfall = dynamic_cast<BBFall*>(blockByEndIdx(i+1));
- bbfall->_out = blockByStartIdx(s.findIdx(j->_addr+j->_offset));
- } else if (isTarget(i+1)) {
- BBFall *bbfall = dynamic_cast<BBFall*>(blockByEndIdx(i+1));
- bbfall->_out = blockByStartIdx(s.findIdx(v[i+1]->_addr));
- }
- if (cj) {
- BasicBlock *bb1 = blockByStartIdx(s.findIdx(cj->_addr+cj->_offset));
- BasicBlock *bb2 = blockByStartIdx(s.findIdx(v[i+1]->_addr));
- bb1->_in.push_back(blockByEndIdx(i+1));
- bb2->_in.push_back(blockByEndIdx(i+1));
- } else if (j) {
- BasicBlock *bb1 = blockByStartIdx(s.findIdx(j->_addr+j->_offset));
- bb1->_in.push_back(blockByEndIdx(i+1));
- } else if (isTarget(i+1)) {
- BasicBlock *bb2 = blockByStartIdx(s.findIdx(v[i+1]->_addr));
- bb2->_in.push_back(blockByEndIdx(i+1));
- }
+ if (bbstart != script.size())
+ _blocks.push_back(new BasicBlock(BasicBlock::End, bbstart, script.size()));
+ for (index_t i = 0; i < script.size(); i++) {
+ BasicBlock *bb = blockByEnd(i+1);
+ if ((j = dynamic_cast<Jump*>(script[i])))
+ addEdge(bb, blockByStart(script.index(j->target())));
+ if (targets.find(i+1) != targets.end() || dynamic_cast<CondJump*>(script[i]))
+ addEdge(bb, blockByStart(i+1));
}
};
Modified: tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc 2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/decompiler.cc 2009-06-10 15:17:44 UTC (rev 41431)
@@ -31,18 +31,10 @@
} else
break;
}
- vector<Instruction*> v = Scumm6Parser().parseFile(argv[argno]);
- if (g_disasm) {
- for (unsigned i = 0; i < v.size(); i++) {
- if (i >= 1 && v[i]->_addr == v[i-1]->_addr)
- printf(" | %s\n", v[i]->_description.c_str());
- else
- printf("(d) %04x | (r) %04x: %s\n", v[i]->_addr-8, v[i]->_addr, v[i]->_description.c_str());
- }
- }
- CFG *cfg = new CFG(v);
+ Script script(new Scumm6Parser(), argv[argno]);
+ CFG *cfg = new CFG(script);
if (g_blocks)
- cfg->printBBs();
+ cfg->printBasicBlocks();
if (g_graph)
cfg->printDot();
return 0;
Modified: tools/branches/gsoc2009-decompiler/decompiler/instruction.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/instruction.h 2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/instruction.h 2009-06-10 15:17:44 UTC (rev 41431)
@@ -6,7 +6,6 @@
#include "misc.h"
-
struct Instruction {
string _description;
uint32 _addr;
@@ -19,6 +18,9 @@
struct Jump : public Instruction {
int16 _offset;
+ uint32 target() {
+ return _addr+_offset;
+ }
Jump(string description, uint32 addr, int16 offset) : Instruction(description, addr), _offset(offset) {
}
};
@@ -29,21 +31,40 @@
};
+struct Script;
+
+struct Parser {
+ virtual void parseFile(Script* script, const char *filename) = 0;
+};
+
struct Script {
- vector<Instruction*> _v;
+ vector<Instruction*> _instructions;
- Script(vector<Instruction*> v) : _v(v) {
+ Script(Parser *parser, const char *filename) {
+ parser->parseFile(this, filename);
}
- uint32 findIdx(uint32 addr) {
- for (uint32 i = 0; i < _v.size(); i++)
- if (_v[i]->_addr == addr)
+ void append(Instruction *instr) {
+ _instructions.push_back(instr);
+ }
+
+ index_t index(address_t addr) {
+ for (index_t i = 0; i < _instructions.size(); i++)
+ if (_instructions[i]->_addr == addr)
return i;
fprintf(stderr, "!!! no instruction with address %x (%d)\n", addr, addr);
return -1;
}
+ Instruction* operator[](index_t i) {
+ return _instructions[i];
+ }
+
+ index_t size() {
+ return _instructions.size();
+ }
+
};
Modified: tools/branches/gsoc2009-decompiler/decompiler/misc.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/misc.h 2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/misc.h 2009-06-10 15:17:44 UTC (rev 41431)
@@ -10,6 +10,9 @@
typedef unsigned short uint16;
typedef unsigned uint32;
+typedef uint32 address_t; // bytecode address
+typedef uint32 index_t; // instruction number in intermediate script
+
uint32 read_be_uint32(ifstream &f) {
uint32 ret = 0;
ret |= f.get() << 24;
Modified: tools/branches/gsoc2009-decompiler/decompiler/parser.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/parser.h 2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/parser.h 2009-06-10 15:17:44 UTC (rev 41431)
@@ -17,7 +17,7 @@
#include "instruction.h"
-struct Scumm6Parser {
+struct Scumm6Parser : public Parser {
SubopcodeReader *_reader;
@@ -143,14 +143,12 @@
}
}
- vector<Instruction*> parseFile(const char *filename) {
- vector<Instruction*> v;
+ void parseFile(Script* script, const char *filename) {
ifstream f;
f.open(filename, ios::binary);
parseHeader(f);
- while (_reader->readInstruction(f, v, f.tellg()))
+ while (_reader->readInstruction(f, script, f.tellg()))
;
- return v;
}
};
Modified: tools/branches/gsoc2009-decompiler/decompiler/reader.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/reader.h 2009-06-10 15:11:17 UTC (rev 41430)
+++ tools/branches/gsoc2009-decompiler/decompiler/reader.h 2009-06-10 15:17:44 UTC (rev 41431)
@@ -20,7 +20,7 @@
struct Reader {
// return true if all went ok and we can safely read next afterwards
- virtual bool readInstruction(ifstream &f, vector<Instruction*> &v, uint32 addr) = 0;
+ virtual bool readInstruction(ifstream &f, Script* script, uint32 addr) = 0;
virtual ~Reader() {
}
};
@@ -70,11 +70,11 @@
return true;
}
- virtual bool readInstruction(ifstream &f, vector<Instruction*> &v, uint32 addr) {
+ virtual bool readInstruction(ifstream &f, Script *script, uint32 addr) {
vector<int16> args;
string descr;
if (readArguments(f, descr, args)) {
- v.push_back(new Instruction(descr, addr));
+ script->append(new Instruction(descr, addr));
return true;
} else {
return false;
@@ -87,11 +87,11 @@
struct _JmpReader : public SimpleReader {
_JmpReader(string description, string format="") : SimpleReader(description, format) {
}
- virtual bool readInstruction(ifstream &f, vector<Instruction*> &v, uint32 addr) {
+ virtual bool readInstruction(ifstream &f, Script* script, uint32 addr) {
vector<int16> args;
string descr;
if (readArguments(f, descr, args)) {
- v.push_back(new T(descr, addr, args[0]));
+ script->append(new T(descr, addr, args[0]));
return true;
} else {
return false;
@@ -117,9 +117,8 @@
_dispatchTable[opcode] = reader;
}
- bool readInstruction(ifstream& f, vector<Instruction*> &v, uint32 addr) {
- // if (f.tellg() >= 0x67)
- // return false;
+ bool readInstruction(ifstream& f, Script *script, uint32 addr) {
+ // if (f.tellg() >= 0x67) return false; // CUT
uint8 opcode = f.get();
if (f.eof()) {
return false;
@@ -127,7 +126,7 @@
fprintf(stderr, "! unhandled opcode 0x%02x (%d) at address 0x%02x (%d)\n", opcode, opcode, addr, addr);
return false;
} else {
- return _dispatchTable[opcode]->readInstruction(f, v, addr);
+ return _dispatchTable[opcode]->readInstruction(f, script, addr);
}
}
};
@@ -140,8 +139,8 @@
SeqReader(Reader *first, Reader *second) : _first(first), _second(second) {
}
- bool readInstruction(ifstream& f, vector<Instruction*> &v, uint32 addr) {
- return _first->readInstruction(f, v, addr) && _second->readInstruction(f, v, addr);
+ bool readInstruction(ifstream& f, Script *script, uint32 addr) {
+ return _first->readInstruction(f, script, addr) && _second->readInstruction(f, script, addr);
}
};
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the Scummvm-git-logs
mailing list