[Scummvm-cvs-logs] SF.net SVN: scummvm:[51719] tools/branches/gsoc2010-decompiler

pidgeot at users.sourceforge.net pidgeot at users.sourceforge.net
Wed Aug 4 04:30:24 CEST 2010


Revision: 51719
          http://scummvm.svn.sourceforge.net/scummvm/?rev=51719&view=rev
Author:   pidgeot
Date:     2010-08-04 02:30:22 +0000 (Wed, 04 Aug 2010)

Log Message:
-----------
Start adapting CFG analysis to KYRA

Basic structure seems to mostly be there, but there are still a few
hitches with functions - probably in the detection.

Modified Paths:
--------------
    tools/branches/gsoc2010-decompiler/Makefile.common
    tools/branches/gsoc2010-decompiler/decompiler/control_flow.cpp
    tools/branches/gsoc2010-decompiler/decompiler/decompiler.cpp
    tools/branches/gsoc2010-decompiler/decompiler/engine.h
    tools/branches/gsoc2010-decompiler/decompiler/graph.h
    tools/branches/gsoc2010-decompiler/decompiler/kyra/disassembler.cpp
    tools/branches/gsoc2010-decompiler/decompiler/kyra/engine.cpp
    tools/branches/gsoc2010-decompiler/decompiler/kyra/engine.h

Modified: tools/branches/gsoc2010-decompiler/Makefile.common
===================================================================
--- tools/branches/gsoc2010-decompiler/Makefile.common	2010-08-04 01:12:41 UTC (rev 51718)
+++ tools/branches/gsoc2010-decompiler/Makefile.common	2010-08-04 02:30:22 UTC (rev 51719)
@@ -275,6 +275,7 @@
 	decompiler/disassembler.o \
 	decompiler/simple_disassembler.o \
 	decompiler/unknown_opcode.o \
+	decompiler/graph.o \
 	decompiler/control_flow.o \
 	decompiler/codegen.o \
 	decompiler/scummv6/disassembler.o \

Modified: tools/branches/gsoc2010-decompiler/decompiler/control_flow.cpp
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/control_flow.cpp	2010-08-04 01:12:41 UTC (rev 51718)
+++ tools/branches/gsoc2010-decompiler/decompiler/control_flow.cpp	2010-08-04 02:30:22 UTC (rev 51719)
@@ -36,11 +36,12 @@
 
 ControlFlow::ControlFlow(const std::vector<Instruction> &insts, Engine *engine) : _insts(insts) {
 	_engine = engine;
-	GraphVertex last;
-	bool addEdge = false;
+
+	if (engine->_functions.empty())
+		engine->_functions[insts.begin()->_address] = Function(insts.begin(), insts.end());
+
+	GroupPtr prev = NULL;
 	int id = 0;
-	GroupPtr prev = NULL;
-
 	for (ConstInstIterator it = insts.begin(); it != insts.end(); ++it) {
 		GraphVertex cur = boost::add_vertex(_g);
 		_addrMap[it->_address] = cur;
@@ -48,13 +49,31 @@
 		PUT_ID(cur, id);
 		id++;
 
+		if (_engine->_functions.find(it->_address) != _engine->_functions.end())
+			_engine->_functions[it->_address]._v = cur;
+
+		prev = GET(cur);
+	}
+
+	FuncMap::iterator fn;
+	GraphVertex last;
+	bool addEdge = false;
+	prev = NULL;
+	for (ConstInstIterator it = insts.begin(); it != insts.end(); ++it) {
+		if (_engine->_functions.find(it->_address) != _engine->_functions.end()) {
+			addEdge = false;
+		}
+
+		GraphVertex cur = find(it);
 		if (addEdge) {
 			GraphEdge e = boost::add_edge(last, cur, _g).first;
 			PUT_EDGE(e, false);
 		}
+
 		last = cur;
-		addEdge = (it->_type != kJump && it->_type != kJumpRel);
+		addEdge = (it->_type != kJump && it->_type != kJumpRel && it->_type != kReturn);
 		prev = GET(cur);
+
 	}
 
 	for (ConstInstIterator it = insts.begin(); it != insts.end(); ++it) {
@@ -139,9 +158,10 @@
 }
 
 void ControlFlow::createGroups() {
-	if (GET(find(_insts.begin()))->_stackLevel != -1)
+	if (GET(_engine->_functions.begin()->second._v)->_stackLevel != -1)
 		return;
-	setStackLevel(find(_insts.begin()), 0);
+	for (FuncMap::iterator fn = _engine->_functions.begin(); fn != _engine->_functions.end(); ++fn)
+		setStackLevel(fn->second._v, 0);
 	ConstInstIterator curInst, nextInst;
 	nextInst = _insts.begin();
 	nextInst++;
@@ -152,13 +172,15 @@
 		GraphVertex cur = find(curInst);
 		GraphVertex next = find(nextInst);
 
-		if (in_degree(cur, _g) == 0 && out_degree(cur, _g) == 0)
-			continue;
 		GroupPtr grCur = GET(cur);
 		GroupPtr grNext = GET(next);
+
+		// Don't process unreachable code
+		if (grCur->_stackLevel == -1)
+			continue;
+
 		expectedStackLevel = grCur->_stackLevel;
-
-		if (expectedStackLevel > grNext->_stackLevel && grNext->_stackLevel >= 0)
+		if (expectedStackLevel > grNext->_stackLevel)
 			expectedStackLevel = grNext->_stackLevel;
 
 		grCur->_stackLevel = expectedStackLevel;
@@ -171,6 +193,10 @@
 			continue;
 		}
 
+		// Group ends with a return
+		if (curInst->_type == kReturn)
+			continue;
+
 		// Group ends before target of a jump
 		if (in_degree(next, _g) != 1) {
 			stackLevel = grNext->_stackLevel;

Modified: tools/branches/gsoc2010-decompiler/decompiler/decompiler.cpp
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/decompiler.cpp	2010-08-04 01:12:41 UTC (rev 51718)
+++ tools/branches/gsoc2010-decompiler/decompiler/decompiler.cpp	2010-08-04 02:30:22 UTC (rev 51719)
@@ -148,12 +148,12 @@
 				buf = std::cout.rdbuf();
 			}
 			std::ostream out(buf);
-			boost::write_graphviz(out, g, boost::make_label_writer(get(boost::vertex_name, g)), boost::makeArrowheadWriter(get(boost::edge_attribute, g)), GraphProperties());
+			boost::write_graphviz(out, g, boost::make_label_writer(get(boost::vertex_name, g)), boost::makeArrowheadWriter(get(boost::edge_attribute, g)), GraphProperties(engine, g));
 		}
 
 		if (!engine->supportsCodeGen() || vm.count("only-graph")) {
 			if (!vm.count("dump-graph")) {
-				boost::write_graphviz(std::cout, g, boost::make_label_writer(get(boost::vertex_name, g)), boost::makeArrowheadWriter(get(boost::edge_attribute, g)), GraphProperties());
+				boost::write_graphviz(std::cout, g, boost::make_label_writer(get(boost::vertex_name, g)), boost::makeArrowheadWriter(get(boost::edge_attribute, g)), GraphProperties(engine, g));
 			}
 			delete cf;
 			delete engine;

Modified: tools/branches/gsoc2010-decompiler/decompiler/engine.h
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/engine.h	2010-08-04 01:12:41 UTC (rev 51718)
+++ tools/branches/gsoc2010-decompiler/decompiler/engine.h	2010-08-04 02:30:22 UTC (rev 51719)
@@ -33,8 +33,8 @@
  */
 struct Function {
 public:
-	InstIterator _startIt; ///< Iterator to of the first instruction in the function, if available.
-	InstIterator _endIt;   ///< Iterator to the instruction immediately after the function, similar to end() on STL containers, if available.
+	ConstInstIterator _startIt; ///< Iterator to of the first instruction in the function, if available.
+	ConstInstIterator _endIt;   ///< Iterator to the instruction immediately after the function, similar to end() on STL containers, if available.
 	std::string _name;     ///< Function name.
 	GraphVertex _v;        ///< Graph vertex for the entry point to the function.
 	uint32 _args;          ///< Number of arguments to the function.
@@ -54,7 +54,7 @@
 	 * @param startIt Index of the first instruction in the function.
 	 * @param endIt Index of the instruction immediately after the function, similar to end() on STL containers.
 	 */
-	Function(InstIterator startIt, InstIterator endIt) : _startIt(startIt), _endIt(endIt) {
+	Function(ConstInstIterator startIt, ConstInstIterator endIt) : _startIt(startIt), _endIt(endIt) {
 	}
 };
 

Modified: tools/branches/gsoc2010-decompiler/decompiler/graph.h
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/graph.h	2010-08-04 01:12:41 UTC (rev 51718)
+++ tools/branches/gsoc2010-decompiler/decompiler/graph.h	2010-08-04 02:30:22 UTC (rev 51719)
@@ -379,21 +379,32 @@
 }
 }
 
+class Engine;
+
 /**
  * Type used to set properties for dot output.
  */
 struct GraphProperties {
+private:
+	Engine *_engine; ///< Pointer to the engine containing function information for the script.
+	const Graph &_g; ///< Const reference to the graph for the script.
 
+public:
 	/**
+	 * Constructor for GraphProperties.
+	 *
+	 * @param engine Pointer to the engine containing function information for the script.
+	 * @param g Const reference to the graph for the script.
+	 */
+	GraphProperties(Engine *engine, const Graph &g) : _engine(engine), _g(g) {
+	}
+
+	/**
 	 * Called by write_graphviz from Boost.Graph to print properties of the graph.
 	 *
 	 * @param out The std::ostream write_graphviz is writing to.
 	 */
-	void operator()(std::ostream& out) const {
-		out << "node [shape=record]" << std::endl;
-		out << "XXX [shape=none, label=\"\", height=0]" << std::endl;
-		out << "XXX -> 0" << std::endl;
-	}
+	void operator()(std::ostream& out) const;
 };
 
 #endif

Modified: tools/branches/gsoc2010-decompiler/decompiler/kyra/disassembler.cpp
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/kyra/disassembler.cpp	2010-08-04 01:12:41 UTC (rev 51718)
+++ tools/branches/gsoc2010-decompiler/decompiler/kyra/disassembler.cpp	2010-08-04 02:30:22 UTC (rev 51719)
@@ -374,7 +374,7 @@
 
 #define ADD_INST _insts.insert(_insts.end(), Instruction());
 #define LAST_INST (_insts[_insts.size()-1])
-#define OPCODE_MD(name, category, stackChange, hasParam, codeGenData) \
+#define OPCODE_MD(name, category, stackChange, hasParam, isSigned, codeGenData) \
 		ADD_INST; \
 		LAST_INST._opcode = opcode; \
 		LAST_INST._address = address; \
@@ -384,89 +384,94 @@
 		LAST_INST._codeGenData = codeGenData; \
 		if (hasParam) { \
 			Parameter p; \
-			p._type = kShort; \
-			p._value = parameter; \
+			if (isSigned) { \
+				p._type = kShort; \
+				p._value = parameter; \
+			} else { \
+				p._type = kUShort; \
+				p._value = (uint32)parameter; \
+			} \
 			LAST_INST._params.push_back(p);\
 		}
-#define OPCODE(name, category, stackChange, hasParam) OPCODE_MD(name, category, stackChange, hasParam, "");
+#define OPCODE(name, category, stackChange, hasParam, isSigned) OPCODE_MD(name, category, stackChange, hasParam, isSigned, "");
 
 		// TOOD: Add metadata where applicable
 		switch(opcode) {
 		case 0:
-			parameter *=2;
+			parameter *= 2;
 			if (parameter < minFuncAddr)
 				jumpTargets.insert(_insts.size());
-			OPCODE("jumpTo", kJump, 0, true);
+			OPCODE("jumpTo", kJump, 0, true, false);
 			break;
 		case 1:
-			OPCODE("setRetValue", kStore, 0, true);
+			OPCODE("setRetValue", kStore, 0, true, true);
 			break;
 		case 2:
 			if (parameter == 0) {
-				OPCODE("pushRet", kLoad, 1, false);
+				OPCODE("pushRet", kLoad, 1, false, false);
 			} else if (parameter == 1) {
-				OPCODE("pushPos", kSpecial, 2, false); // Sets up function call
+				OPCODE("pushPos", kSpecial, 2, false, false); // Sets up function call
 			} else {
 				// Error: invalid parameter halts execution
 			}
 			break;
 		case 3:
 		case 4:
-			OPCODE("push", kLoad, 1, true);
+			OPCODE("push", kLoad, 1, true, true);
 			break;
 		case 5:
-			OPCODE("pushVar", kLoad, 1, true);
+			OPCODE("pushVar", kLoad, 1, true, true);
 			break;
 		case 6:
-			OPCODE("pushBPNeg", kLoad, 1, true);
+			OPCODE("pushBPNeg", kLoad, 1, true, true);
 			break;
 		case 7:
-			OPCODE("pushBPAdd", kLoad, 1, true);
+			OPCODE("pushBPAdd", kLoad, 1, true, true);
 			break;
 		case 8:
 			if (parameter == 0) {
-				OPCODE("popRet", kStore, -1, false);
+				OPCODE("popRet", kStore, -1, false, false);
 			} else if (parameter == 1) {
-				OPCODE("popPos", kReturn, -2, false); // Returns from function call
+				OPCODE("popPos", kReturn, 0, false, false); // Returns from function call
 			} else {
 				// Error: invalid parameter halts execution
 			}
 			break;
 		case 9:
-			OPCODE("popVar", kStore, 1, true);
+			OPCODE("popVar", kStore, 1, true, true);
 			break;
 		case 10:
-			OPCODE("popBPNeg", kStore, 1, true);
+			OPCODE("popBPNeg", kStore, 1, true, true);
 			break;
 		case 11:
-			OPCODE("popBPAdd", kStore, 1, true);
+			OPCODE("popBPAdd", kStore, 1, true, true);
 			break;
 		case 12:
-			OPCODE("addSP", kStack, -parameter, true);
+			OPCODE("addSP", kStack, -parameter, true, true);
 			break;
 		case 13:
-			OPCODE("subSP", kStack, parameter, true);
+			OPCODE("subSP", kStack, parameter, true, true);
 			break;
 		case 14:
 			parameter = (uint8)parameter;
 			if ((uint16)parameter >= sizeof(kyra2FuncDesc) / sizeof(kyra2FuncDesc[0]) || kyra2FuncDesc[parameter]._name.length() == 0) {
 				// Error: unknown function
 			}
-			OPCODE_MD(kyra2FuncDesc[parameter]._name, kSpecial, 0, false, kyra2FuncDesc[parameter]._metadata)
+			OPCODE_MD(kyra2FuncDesc[parameter]._name, kSpecial, 0, false, false, kyra2FuncDesc[parameter]._metadata)
 			break;
 		case 15:
-			parameter *=2;
+			parameter *= 2;
 			if (parameter < minFuncAddr)
 				jumpTargets.insert(_insts.size());
-			OPCODE("ifNotJmp", kCondJump, -1, true);
+			OPCODE("ifNotJmp", kCondJump, -1, true, false);
 			break;
 		case 16:
 			if (parameter == 0) {
-				OPCODE_MD("boolCast", kUnaryOp, 0, false, "(bool)");
+				OPCODE_MD("boolCast", kUnaryOp, 0, false, false, "(bool)");
 			} else if (parameter == 1) {
-				OPCODE_MD("arithmeticNegate", kUnaryOp, 0, false,"-");
+				OPCODE_MD("arithmeticNegate", kUnaryOp, 0, false, false,"-");
 			} else if (parameter == 2) {
-				OPCODE_MD("bitwiseNegate", kUnaryOp, 0, false, "~");
+				OPCODE_MD("bitwiseNegate", kUnaryOp, 0, false, false, "~");
 			} else {
 				// Error: invalid parameter halts execution
 			}
@@ -474,58 +479,58 @@
 		case 17:
 			switch (parameter) {
 				case 0:
-					OPCODE_MD("eval_band", kBinaryOp, -1, false, "&&");
+					OPCODE_MD("eval_band", kBinaryOp, -1, false, false, "&&");
 					break;
 				case 1:
-					OPCODE_MD("eval_bor", kBinaryOp, -1, false, "||");
+					OPCODE_MD("eval_bor", kBinaryOp, -1, false, false, "||");
 					break;
 				case 2:
-					OPCODE_MD("eval_eq", kComparison, -1, false, "==");
+					OPCODE_MD("eval_eq", kComparison, -1, false, false, "==");
 					break;
 				case 3:
-					OPCODE_MD("eval_neq", kComparison, -1, false, "!=");
+					OPCODE_MD("eval_neq", kComparison, -1, false, false, "!=");
 					break;
 				case 4:
-					OPCODE_MD("eval_leq", kComparison, -1, false, "<=");
+					OPCODE_MD("eval_leq", kComparison, -1, false, false, "<=");
 					break;
 				case 5:
-					OPCODE_MD("eval_lt", kComparison, -1, false, "<");
+					OPCODE_MD("eval_lt", kComparison, -1, false, false, "<");
 					break;
 				case 6:
-					OPCODE_MD("eval_geq", kComparison, -1, false, ">=");
+					OPCODE_MD("eval_geq", kComparison, -1, false, false, ">=");
 					break;
 				case 7:
-					OPCODE_MD("eval_gt", kComparison, -1, false, ">");
+					OPCODE_MD("eval_gt", kComparison, -1, false, false, ">");
 					break;
 				case 8:
-					OPCODE_MD("eval_add", kBinaryOp, -1, false, "+");
+					OPCODE_MD("eval_add", kBinaryOp, -1, false, false, "+");
 					break;
 				case 9:
-					OPCODE_MD("eval_sub", kBinaryOp, -1, false, "-");
+					OPCODE_MD("eval_sub", kBinaryOp, -1, false, false, "-");
 					break;
 				case 10:
-					OPCODE_MD("eval_mult", kBinaryOp, -1, false, "*");
+					OPCODE_MD("eval_mult", kBinaryOp, -1, false, false, "*");
 					break;
 				case 11:
-					OPCODE_MD("eval_div", kBinaryOp, -1, false, "/");
+					OPCODE_MD("eval_div", kBinaryOp, -1, false, false, "/");
 					break;
 				case 12:
-					OPCODE_MD("eval_shr", kBinaryOp, -1, false, ">>");
+					OPCODE_MD("eval_shr", kBinaryOp, -1, false, false, ">>");
 					break;
 				case 13:
-					OPCODE_MD("eval_shl", kBinaryOp, -1, false, "<<");
+					OPCODE_MD("eval_shl", kBinaryOp, -1, false, false, "<<");
 					break;
 				case 14:
-					OPCODE_MD("eval_land", kBinaryOp, -1, false, "&");
+					OPCODE_MD("eval_land", kBinaryOp, -1, false, false, "&");
 					break;
 				case 15:
-					OPCODE_MD("eval_lor", kBinaryOp, -1, false, "|");
+					OPCODE_MD("eval_lor", kBinaryOp, -1, false, false, "|");
 					break;
 				case 16:
-					OPCODE_MD("eval_mod", kBinaryOp, -1, false, "%");
+					OPCODE_MD("eval_mod", kBinaryOp, -1, false, false, "%");
 					break;
 				case 17:
-					OPCODE_MD("eval_xor", kBinaryOp, -1, false, "^");
+					OPCODE_MD("eval_xor", kBinaryOp, -1, false, false, "^");
 					break;
 				default:
 					// Error: Invalid parameter
@@ -533,7 +538,7 @@
 			}
 			break;
 		case 18:
-			OPCODE("setRetAndJmp", kSpecial, -2, false);
+			OPCODE("setRetAndJmp", kSpecial, -2, false, false);
 			break;
 		default:
 			throw UnknownOpcodeException(i*2, code);
@@ -553,36 +558,36 @@
 	// Function detection
 	uint16 nextFunc = 0;
 	// Process candidate entry points
-	for (std::set<uint16>::iterator it = jumpTargets.begin(); it != jumpTargets.end(); ++it) {
+/*	for (std::set<uint16>::iterator it = jumpTargets.begin(); it != jumpTargets.end(); ++it) {
 		// If candidate was already placed inside a function, skip it
 		if (_insts[*it]._address < nextFunc)
 			continue;
-		// Determine function end point
+*/		// Determine function end point
 		bool lastWasPop = false;
 
-		for (int i = *it ; _insts[i]._address < minFuncAddr; i++) {
-			if (_insts[i]._address < *it)
-				continue;
+		for (int i = 0 ; _insts[i]._address < minFuncAddr; i++) {
+//			if (_insts[i]._address < *it)
+//				continue;
 			// Kyra2 sometimes has an addSP instruction between the two popPos instrucitons, so we ignore those
 			if (_insts[i]._name.compare("addSP") == 0)
 					continue;
 
 			if (lastWasPop && _insts[i]._name.compare("popPos") == 0) {
-				_engine->_functions[_insts[*it]._address] = Function(addrMap[_insts[*it]._address], addrMap[_insts[i]._address]);
-				nextFunc = _insts[i]._address;
+				_engine->_functions[_insts[nextFunc]._address] = Function(addrMap[_insts[nextFunc]._address], addrMap[_insts[i]._address]);
+				nextFunc = i+1;
 				break;
 			}
 
 			lastWasPop = (_insts[i]._name.compare("popPos") == 0);
 		}
-	}
+//	}
 
 	// Add metadata to newly found functions
 	for (FuncMap::iterator it = _engine->_functions.begin(); it != _engine->_functions.end(); ++it) {
 		std::stringstream s;
 		s << boost::format("sub0x%X") % it->second._startIt->_address;
 		int maxArg = 0;
-		for (InstIterator instIt = it->second._startIt; instIt != it->second._endIt; ++instIt) {
+		for (ConstInstIterator instIt = it->second._startIt; instIt != it->second._endIt; ++instIt) {
 			if (instIt->_name.compare("pushBPAdd") == 0) {
 				if (maxArg < instIt->_params[0].getSigned()) {
 					maxArg = instIt->_params[0].getSigned();
@@ -605,8 +610,10 @@
 	// Correct jumps to functions so they're treated as calls
 	for (InstIterator it = _insts.begin(); it != _insts.end(); ++it) {
 		if (it->_type == kJump || it->_type == kCondJump) {
-			if (_engine->_functions.find(it->_address) != _engine->_functions.end())
+			if (_engine->_functions.find(it->_params[0].getUnsigned()) != _engine->_functions.end()) {
 				it->_type = kCall;
+				std::clog << boost::format("Changed instruction at 0x%08X to call\n") % it->_address;
+			}
 		}
 	}
 }

Modified: tools/branches/gsoc2010-decompiler/decompiler/kyra/engine.cpp
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/kyra/engine.cpp	2010-08-04 01:12:41 UTC (rev 51718)
+++ tools/branches/gsoc2010-decompiler/decompiler/kyra/engine.cpp	2010-08-04 02:30:22 UTC (rev 51719)
@@ -28,7 +28,7 @@
 }
 
 uint32 Kyra::Engine::getDestAddress(ConstInstIterator it) const {
-	return it->_params[0].getSigned();
+	return it->_params[0].getUnsigned();
 }
 
 ::CodeGenerator *Kyra::Engine::getCodeGenerator(std::ostream &output) {

Modified: tools/branches/gsoc2010-decompiler/decompiler/kyra/engine.h
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/kyra/engine.h	2010-08-04 01:12:41 UTC (rev 51718)
+++ tools/branches/gsoc2010-decompiler/decompiler/kyra/engine.h	2010-08-04 02:30:22 UTC (rev 51719)
@@ -38,7 +38,6 @@
 	::Disassembler *getDisassembler();
 	uint32 getDestAddress(ConstInstIterator it) const;
 	::CodeGenerator *getCodeGenerator(std::ostream &output);
-	bool supportsCodeFlow() { return false; }
 	bool supportsCodeGen() { return false; }
 
 	std::vector<std::string> _textStrings;


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list