[Scummvm-cvs-logs] SF.net SVN: scummvm:[41977] tools/branches/gsoc2009-decompiler/decompiler
kjdf at users.sourceforge.net
kjdf at users.sourceforge.net
Tue Jun 30 22:02:34 CEST 2009
Revision: 41977
http://scummvm.svn.sourceforge.net/scummvm/?rev=41977&view=rev
Author: kjdf
Date: 2009-06-30 20:02:33 +0000 (Tue, 30 Jun 2009)
Log Message:
-----------
decompiler: merged ControlFlowGraph and Graph, nearly removed Script as well
Modified Paths:
--------------
tools/branches/gsoc2009-decompiler/decompiler/Makefile
tools/branches/gsoc2009-decompiler/decompiler/decompiler.cpp
tools/branches/gsoc2009-decompiler/decompiler/graph.h
tools/branches/gsoc2009-decompiler/decompiler/instruction.h
tools/branches/gsoc2009-decompiler/decompiler/misc.h
Removed Paths:
-------------
tools/branches/gsoc2009-decompiler/decompiler/cfg.h
Modified: tools/branches/gsoc2009-decompiler/decompiler/Makefile
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/Makefile 2009-06-30 17:41:24 UTC (rev 41976)
+++ tools/branches/gsoc2009-decompiler/decompiler/Makefile 2009-06-30 20:02:33 UTC (rev 41977)
@@ -78,7 +78,7 @@
decompiler$(EXEEXT): decompiler.o
$(CXX) $(LDFLAGS) -o $@ $+ -lboost_program_options
-decompiler.o: cfg.h graph.h instruction.h misc.h parser.h reader.h
+decompiler.o: graph.h instruction.h misc.h parser.h reader.h
######################################################################
Deleted: tools/branches/gsoc2009-decompiler/decompiler/cfg.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/cfg.h 2009-06-30 17:41:24 UTC (rev 41976)
+++ tools/branches/gsoc2009-decompiler/decompiler/cfg.h 2009-06-30 20:02:33 UTC (rev 41977)
@@ -1,122 +0,0 @@
-#ifndef CFG_H
-#define CFG_H
-
-#include <map>
-#include <list>
-
-
-#include <cstdio>
-
-using namespace std;
-
-#include "graph.h"
-
-#include "instruction.h"
-#include "misc.h"
-
-
-struct Block {
-
- Script &_script;
- index_t _begin, _end;
-
- Block(Script &script, index_t begin, index_t end) : _script(script), _begin(begin), _end(end) {
- }
-
- virtual ~Block() {}
-
- virtual void print(ostream &out) {
- }
-
-};
-
-
-struct BasicBlock : public Block {
-
- BasicBlock(Script &script, index_t begin, index_t end) : Block(script, begin, end) {
- }
-
- void print(ostream &out) {
- for (index_t i = _begin; i <= _end; i++)
- _script.print(out, i);
- }
-
-};
-
-
-struct CFG {
-
- void printBasicBlocks(ostream &out) {
- foreach (Block *block, _blocks) {
- block->print(out);
- out << endl;
- }
- }
-
- static string printer(Block *block) {
- stringstream ret;
- block->print(ret);
- return ret.str();
- }
-
- void printDot(ostream &out, const string &fontname = "Courier") {
- out << _graph.graphvizPrint(printer, fontname);
- }
-
- void removeJumpsToJumps() {
- for (bool changed = true; changed; ) {
- changed = false;
- foreach (Block *bu, _blocks) {
- Node *u = _nodes[bu->_begin];
- foreach (Node *v, u->out()) {
- Block *bv = v->_data;
- Jump *jump = dynamic_cast<Jump*>(_script[bv->_begin]);
- if (jump && !dynamic_cast<CondJump*>(jump) && jump->target() != jump->_addr) {
- changed = true;
- _graph.replaceEdges(u, v, _nodes[_script.index(jump->target())]);
- }
- }
- }
- }
- }
-
- void removeDeadBlocks() {
- _graph.removeUnreachableNodes();
- }
-
- typedef Graph<Block*>::Node Node;
- map<index_t, Node*> _nodes;
- list<Block*> _blocks;
- Script &_script;
-
- Graph<Block*> _graph;
-
- CFG(Script &script) : _script(script) {
- _nodes[script.size()] = 0;
- Jump *jump;
- for (index_t i = 0; i < script.size(); i++)
- if ((jump = dynamic_cast<Jump*>(script[i]))) {
- _nodes[script.index(jump->target())] = 0;
- _nodes[i+1] = 0;
- }
- index_t begin = 0;
- for (index_t i = 0; i < script.size(); i++)
- if (contains(_nodes, i+1)) {
- _blocks.push_back(new BasicBlock(script, begin, i));
- _nodes[begin] = _graph.addNode(_blocks.back());
- begin = i+1;
- }
- foreach (Block *block, _blocks) {
- Node *node = _nodes[block->_begin];
- index_t end = block->_end;
- if ((jump = dynamic_cast<Jump*>(script[end])))
- _graph.addEdge(node, _nodes[script.index(jump->target())]);
- if (end+1 < script.size() && contains(_nodes, end+1) && (!jump || dynamic_cast<CondJump*>(jump)))
- _graph.addEdge(node, _nodes[end+1]);
- }
- }
-};
-
-
-#endif
-
Modified: tools/branches/gsoc2009-decompiler/decompiler/decompiler.cpp
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/decompiler.cpp 2009-06-30 17:41:24 UTC (rev 41976)
+++ tools/branches/gsoc2009-decompiler/decompiler/decompiler.cpp 2009-06-30 20:02:33 UTC (rev 41977)
@@ -3,7 +3,7 @@
#include <boost/program_options.hpp>
#include "parser.h"
-#include "cfg.h"
+#include "graph.h"
using namespace std;
using namespace boost::program_options;
@@ -20,7 +20,7 @@
("fontname", value<string>()->default_value("Courier"), "font to use with graphical output");
options_description options("Allowed options");
options.add(visible).add_options()
- ("derive", value<int>()->default_value(0), "find arg-th order intervals")
+ // ("derive", value<int>()->default_value(0), "find arg-th order intervals")
("inputfile", value<string>(), "input file");
positional_options_description pos;
pos.add("inputfile", 1);
@@ -43,26 +43,26 @@
variables_map vars = parseArgs(argc, argv);
Script script(new Scumm6Parser, vars["inputfile"].as<string>().c_str());
if (vars.count("disasm")) {
- for (size_t i = 0; i < script.size(); i++)
- script.print(cout, i);
+ foreach (Instruction *instruction, script._instructions)
+ cout << instruction->toString();
exit(0);
}
- CFG cfg(script);
+ ControlFlowGraph cfg;
+ cfg.addBlocksFromScript(script._instructions.begin(), script._instructions.end());
+ cfg.setEntry(script._instructions.front()->_addr);
if (vars.count("blocks")) {
- cfg.printBasicBlocks(cout);
+ foreach (Block *block, cfg._blocks)
+ cout << block->toString() << endl;
exit(0);
}
- cfg.removeJumpsToJumps();
- cfg._graph.intervals();
if (vars.count("graph")) {
- Graph<Block*> &g = cfg._graph;
- g.orderNodes();
- cfg.removeDeadBlocks();
- g.intervals();
- for (int i = 0; i < vars["derive"].as<int>(); i++)
- g.extendIntervals();
- g.loopStruct();
- cfg.printDot(cout, vars["fontname"].as<string>());
+ cfg.removeJumpsToJumps();
+ cfg.orderBlocks();
+ cfg.removeUnreachableBlocks();
+ // for (int i = 0; i < vars["derive"].as<int>(); i++)
+ // cfg.extendIntervals();
+ cfg.loopStruct();
+ cout << cfg.graphvizToString(vars["fontname"].as<string>());
exit(0);
}
return 0;
Modified: tools/branches/gsoc2009-decompiler/decompiler/graph.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/graph.h 2009-06-30 17:41:24 UTC (rev 41976)
+++ tools/branches/gsoc2009-decompiler/decompiler/graph.h 2009-06-30 20:02:33 UTC (rev 41977)
@@ -2,23 +2,14 @@
#define GRAPH_H
#include "misc.h"
-
#include <cassert>
-
#include <list>
#include <map>
#include <set>
#include <sstream>
-
-#include <boost/foreach.hpp>
-#ifndef foreach
-#define foreach BOOST_FOREACH
-#endif
-
#include <iostream>
-
enum LoopType {
PRE_TESTED,
POST_TESTED,
@@ -26,124 +17,161 @@
};
-template<typename Data>
-struct Graph : boost::noncopyable {
+struct Block : boost::noncopyable {
- struct Node : boost::noncopyable {
+ Block *_interval;
+ Block *_loopFollow;
+ Block *_loopHead;
+ Block *_loopLatch;
+ Block *_primitive;
+ LoopType _loopType;
+ int _number;
+ list<Block*> _in;
+ list<Block*> _out;
+ list<Instruction*> _instructions;
- Data _data;
+ string toString() {
+ ostringstream ret;
+ foreach (Instruction *instruction, _instructions)
+ ret << instruction->toString();
+ return ret.str();
+ }
- const std::list<Node*> &out() const {
- return _out;
- }
+ Block() : _interval(), _number(), _loopHead(), _loopFollow() {
+ }
- // wouldn't be needed if Graph<X> and Graph<Y> weren't totally alien classes
- Node *interval() const {
- return _interval;
- }
+ ~Block() {
+ }
+};
- private:
- friend class Graph;
+struct ControlFlowGraph : boost::noncopyable {
- Node *_interval;
- std::list<Node*> _in;
- std::list<Node*> _out;
+ Block *_entry;
+ std::list<Block*> _blocks;
+ map<address_t, Block*> _targets;
- int _number;
+ ControlFlowGraph() : _entry() {
+ }
- Node *_loopHead;
- Node *_loopLatch;
- Node *_loopFollow;
- LoopType _loopType;
+ ~ControlFlowGraph() {
+ foreach (Block *u, _blocks)
+ delete u;
+ }
- Node(const Data &data) : _data(data), _interval(), _number(), _loopHead(), _loopFollow() {
+ template<typename Iterator>
+ void addBlocksFromScript(Iterator scriptBegin, Iterator scriptEnd) {
+ Jump *jump;
+ for (Iterator it = scriptBegin; it != scriptEnd; it++)
+ if ((jump = dynamic_cast<Jump*>(*it))) {
+ _targets[jump->target()] = 0;
+ if (next(it) != scriptEnd)
+ _targets[(*next(it))->_addr] = 0;
+ }
+ Iterator first = scriptBegin;
+ for (Iterator last = scriptBegin; last != scriptEnd; last++) {
+ if (next(last) == scriptEnd || contains(_targets, (*next(last))->_addr)) {
+ _targets[(*first)->_addr] = addBlock(first, next(last));
+ first = next(last);
+ }
}
-
- ~Node() {
+ foreach (Block *block, _blocks) {
+ if ((jump = dynamic_cast<Jump*>(block->_instructions.back())))
+ addEdge(block, _targets[jump->target()]);
+ map<address_t, Block*>::iterator succ = next(_targets.find(block->_instructions.front()->_addr));
+ if (succ != _targets.end() && (!jump || dynamic_cast<CondJump*>(jump)))
+ addEdge(block, succ->second);
}
- };
-
- std::list<Node*> _nodes;
- Node *_entry;
- int _currentNumber;
-
- Graph() : _entry(), _currentNumber() {
}
- ~Graph() {
- foreach (Node *u, _nodes)
- delete u;
+ void setEntry(address_t entry) {
+ foreach (Block *block, _blocks)
+ if (block->_instructions.front()->_addr == entry)
+ _entry = block;
}
- void setEntry(Node *entry) {
- _entry = entry;
+ template<typename Iterator>
+ Block *addBlock(Iterator first, Iterator last) {
+ Block* block = new Block;
+ _blocks.push_back(block);
+ copy(first, last, back_inserter(block->_instructions));
+ return block;
}
- Node *addNode(const Data &data) {
- Node* node = new Node(data);
- _nodes.push_back(node);
- if (!_entry)
- _entry = node;
- return node;
- }
-
- void addEdge(Node *from, Node *to) {
+ void addEdge(Block *from, Block *to) {
from->_out.push_back(to);
to->_in.push_back(from);
}
- void replaceEdges(Node *from, Node *oldTo, Node *newTo) {
+ void removeJumpsToJumps() {
+ for (bool changed = true; changed; ) {
+ changed = false;
+ foreach (Block *u, _blocks) {
+ foreach (Block *v, u->_out) {
+ Jump *jump = dynamic_cast<Jump*>(v->_instructions.front());
+ if (jump && !dynamic_cast<CondJump*>(jump) && jump->target() != jump->_addr) {
+ changed = true;
+ replaceEdges(u, v, _targets[jump->target()]);
+ }
+ }
+ }
+ }
+ }
+
+ void replaceEdges(Block *from, Block *oldTo, Block *newTo) {
size_t n = count(oldTo->_in.begin(), oldTo->_in.end(), from);
oldTo->_in.remove(from);
fill_n(back_inserter(newTo->_in), n, from);
- foreach (Node *&node, from->_out)
- if (node == oldTo)
- node = newTo;
+ foreach (Block *&block, from->_out)
+ if (block == oldTo)
+ block = newTo;
}
- // to be called after order nodes
- void removeUnreachableNodes() {
- for (typename std::list<Node*>::iterator uit = _nodes.begin(); uit != _nodes.end(); )
- if ((*uit)->_number)
- uit++;
+ // to be called after order blocks
+ void removeUnreachableBlocks() {
+ foreach (Block *u, _blocks)
+ if (!u->_number) {
+ foreach (Block *v, u->_out)
+ v->_in.remove(u);
+ }
+ for (list<Block*>::iterator it = _blocks.begin(); it != _blocks.end(); )
+ if ((*it)->_number)
+ it++;
else {
- foreach (Node *v, (*uit)->_out)
- v->_in.remove(*uit);
- delete *uit;
- uit = _nodes.erase(uit);
+ delete *it;
+ it = _blocks.erase(it);
}
}
- // assign node numbers in post-order
- void orderNodes() {
+ // assign block numbers in post-order
+ void orderBlocks() {
assert(_entry);
orderVisit(_entry, 0);
}
- int orderVisit(Node *u, int number) {
+ int orderVisit(Block *u, int number) {
u->_number = -1;
- foreach (Node *v, u->_out)
+ foreach (Block *v, u->_out)
if (!v->_number)
number = orderVisit(v, number);
u->_number = ++number;
return number;
}
- std::list<Node*> intervals() const {
- std::list<Node*> ret;
+ std::list<Block*> intervals() const {
+ std::list<Block*> ret;
assignIntervals();
- foreach (Node *u, _nodes)
+ foreach (Block *u, _blocks)
if (u->_interval == u)
ret.push_back(u);
return ret;
}
- bool inLoop(Node *head, Node *latch, Node *u) {
+ bool inLoop(Block *head, Block *latch, Block *u) {
return u->_interval == head && latch->_number <= u->_number && u->_number < head->_number;
}
- LoopType loopType(Node *head, Node *latch) {
+ LoopType loopType(Block *head, Block *latch) {
if (head->_out.size() == 1 && latch->_out.size() == 1)
return ENDLESS;
if (head->_out.size() == 1 && latch->_out.size() == 2)
@@ -157,25 +185,25 @@
return PRE_TESTED;
}
- Node *loopFollow(Node *head, Node *latch) {
+ Block *loopFollow(Block *head, Block *latch) {
if (head->_loopType == PRE_TESTED)
return head->_out.front();
if (head->_loopType == POST_TESTED)
return latch->_out.back();
// ENDLESS
- Node *ret = 0;
- foreach (Node *u, _nodes)
+ Block *ret = 0;
+ foreach (Block *u, _blocks)
if (inLoop(head, latch, u) && u->_out.size() == 2 && (!ret || ret->_number < u->_out.back()->_number))
ret = u->_out.back();
return ret;
}
void loopStruct() {
- for (size_t size = _nodes.size()+1; size > intervals().size(); size = intervals().size(), extendIntervals())
- foreach (Node *interval, intervals()) {
- foreach (Node *latch, interval->_in) {
+ for (size_t size = _blocks.size()+1; size > intervals().size(); size = intervals().size(), extendIntervals())
+ foreach (Block *interval, intervals()) {
+ foreach (Block *latch, interval->_in) {
if (latch->_interval == interval && !latch->_loopHead) {
- foreach (Node *u, _nodes)
+ foreach (Block *u, _blocks)
if (inLoop(interval, latch, u))
u->_loopHead = interval;
interval->_loopLatch = latch; // TODO do we need this?
@@ -187,72 +215,61 @@
}
void extendIntervals() {
- Graph<Node*> d;
- std::map<Node*, typename Graph<Node*>::Node*> trans;
- foreach (Node *interval, intervals())
- trans[interval] = d.addNode(interval);
- foreach (Node *interval, intervals())
- foreach (Node *u, interval->_in)
+ ControlFlowGraph d;
+ std::map<Block*, Block*> trans;
+ foreach (Block *interval, intervals()) {
+ trans[interval] = d.addBlock(interval->_instructions.begin(), interval->_instructions.end());
+ trans[interval]->_primitive = interval;
+ }
+ foreach (Block *interval, intervals())
+ foreach (Block *u, interval->_in)
if (u->_interval != interval)
d.addEdge(trans[u->_interval], trans[interval]);
- d.setEntry(trans[_entry]);
+ d.setEntry(_entry->_instructions.front()->_addr);
d.intervals();
- foreach (typename Graph<Node*>::Node *du, d._nodes)
- foreach (Node *v, _nodes)
- if (v->_interval == du->_data)
- v->_interval = du->interval()->_data;
+ foreach (Block *du, d._blocks)
+ foreach (Block *v, _blocks)
+ if (v->_interval == du->_primitive)
+ v->_interval = du->_interval->_primitive;
}
- template<typename Printer> // printer is a functor taking Data and returning a string
- std::string graphvizPrint(Printer printer, const std::string &fontname="Courier", int fontsize=14) const {
+ std::string graphvizToString(const std::string &fontname="", int fontsize=0) const {
std::stringstream ret;
ret << "digraph G {" << std::endl;
- foreach (Node *interval, intervals()) {
+ foreach (Block *interval, intervals()) {
ret << "subgraph " << '"' << "cluster_" << interval << '"' << " {" << std::endl;
ret << "style=dotted;" << std::endl;
- foreach (Node *u, _nodes)
+ foreach (Block *u, _blocks)
if (u->_interval == interval) {
- ret << '"' << u << '"'
- << "[";
+ ret << '"' << u << "\"[";
if (fontname != "")
- ret << "fontname=" << '"' << fontname << '"' << ","
- << "fontsize=" << fontsize << ",";
- ret << "shape=box,"
- << "label=" << '"'
- << "<number: " << u->_number;
+ ret << "fontname=" << '"' << fontname << "\",";
+ if (fontsize != 0)
+ ret << "fontsize=" << fontsize << ",";
+ ret << "shape=box,label=\"<number: " << u->_number;
if (u->_loopFollow)
ret << ", loop_type=" << (u->_loopType == PRE_TESTED ? "pre_tested" : u->_loopType == POST_TESTED ? "post_tested" : "endless");
- ret << ">\\n"
- << graphvizEscapeLabel(printer(u->_data))
- << '"'
- << "];" << std::endl;
+ ret << ">\\n" << graphvizEscapeLabel(u->toString()) << "\"];" << std::endl;
}
ret << "}" << std::endl;
}
- foreach (Node *u, _nodes) {
- foreach (Node *v, u->_out)
- ret << '"' << u << '"'
- << " -> "
- << '"' << v << '"'
- << (v == u->_loopFollow ? "[color=blue]" : "")
- << ";" << std::endl;
- }
+ foreach (Block *u, _blocks)
+ foreach (Block *v, u->_out)
+ ret << '"' << u << "\" -> \"" << v << '"' << (v == u->_loopFollow ? "[color=blue]" : "") << ";" << std::endl;
ret << "}" << std::endl;
return ret.str();
}
-private:
-
void assignIntervals() const {
- std::list<Node*> intervals;
+ std::list<Block*> intervals;
intervals.push_back(_entry);
- foreach (Node *interval, intervals) {
+ foreach (Block *interval, intervals) {
interval->_interval = interval;
for (bool added = true; added; ) {
added = false;
- foreach (Node *m, _nodes) {
+ foreach (Block *m, _blocks) {
bool allPredInInterval = true;
- foreach (Node *p, m->_in)
+ foreach (Block *p, m->_in)
allPredInInterval &= p->_interval == interval;
if (!m->_interval && allPredInInterval) {
added = true;
@@ -260,9 +277,9 @@
}
}
}
- foreach (Node *m, _nodes) {
+ foreach (Block *m, _blocks) {
bool anyPredInInterval = false;
- foreach (Node *p, m->_in)
+ foreach (Block *p, m->_in)
anyPredInInterval |= p->_interval == interval;
if (!m->_interval && anyPredInInterval)
intervals.push_back(m);
Modified: tools/branches/gsoc2009-decompiler/decompiler/instruction.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/instruction.h 2009-06-30 17:41:24 UTC (rev 41976)
+++ tools/branches/gsoc2009-decompiler/decompiler/instruction.h 2009-06-30 20:02:33 UTC (rev 41977)
@@ -9,6 +9,13 @@
struct Instruction {
string _description;
uint32 _addr;
+
+ virtual string toString() {
+ ostringstream ret;
+ ret << phex(_addr-8) << " " << _description << endl;
+ return ret.str();
+ }
+
Instruction(string description, uint32 addr) : _description(description), _addr(addr) {
}
virtual ~Instruction() {
@@ -21,6 +28,12 @@
uint32 target() {
return _addr+_offset;
}
+ string toString() {
+ ostringstream ret;
+ ret << phex(_addr-8) << " " << _description << " (" << phex(target()-8) << ")" << endl;
+ return ret.str();
+ }
+
Jump(string description, uint32 addr, int16 offset) : Instruction(description, addr), _offset(offset) {
}
};
@@ -50,33 +63,6 @@
_instructions.push_back(instr);
}
- index_t index(address_t addr) {
- for (index_t i = 0; i < _instructions.size(); i++)
- if (_instructions[i]->_addr == addr)
- return i;
- fprintf(stderr, "!!! (unhandled) no instruction with address %x (%d)\n", addr, addr);
- return (index_t)-1; // Note: -1 is negative, but index_t unsigned
- }
-
- Instruction* operator[](index_t i) {
- return _instructions[i];
- }
-
- index_t size() {
- return _instructions.size();
- }
-
- void print(ostream &out, index_t i) {
- if (i >= 1 && _instructions[i]->_addr == _instructions[i-1]->_addr)
- out << " " << _instructions[i]->_description;
- else
- out << phex(_instructions[i]->_addr-8) << " " << _instructions[i]->_description;
- Jump *jump = dynamic_cast<Jump*>(_instructions[i]);
- if (jump)
- out << " (" << phex(jump->target()-8) << ")";
- out << endl;
- }
-
};
Modified: tools/branches/gsoc2009-decompiler/decompiler/misc.h
===================================================================
--- tools/branches/gsoc2009-decompiler/decompiler/misc.h 2009-06-30 17:41:24 UTC (rev 41976)
+++ tools/branches/gsoc2009-decompiler/decompiler/misc.h 2009-06-30 20:02:33 UTC (rev 41977)
@@ -6,10 +6,13 @@
#include <iomanip>
#include <boost/foreach.hpp>
+#include <boost/utility.hpp>
#ifndef foreach
#define foreach BOOST_FOREACH
#endif
+using namespace boost;
+
typedef unsigned char uint8;
typedef short int16;
typedef unsigned short uint16;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
More information about the Scummvm-git-logs
mailing list