[Scummvm-cvs-logs] SF.net SVN: scummvm:[51621] tools/branches/gsoc2010-decompiler/decompiler

pidgeot at users.sourceforge.net pidgeot at users.sourceforge.net
Mon Aug 2 16:38:46 CEST 2010


Revision: 51621
          http://scummvm.svn.sourceforge.net/scummvm/?rev=51621&view=rev
Author:   pidgeot
Date:     2010-08-02 14:38:45 +0000 (Mon, 02 Aug 2010)

Log Message:
-----------
Major overhaul of else detection

The else detection was horribly, horribly broken, leading to incorrect
and unbalanced output. This has now been revamped entirely, and all
SCUMMv6 scripts are at least balanced now. Additionally, various output
enhancements have been added.

Note: Short-circuit detection has been temporarily disabled, pending
proper handling. Consequently, one test, testShortCircuitDetection, is
currently *expected* to fail.

Modified Paths:
--------------
    tools/branches/gsoc2010-decompiler/decompiler/codegen.cpp
    tools/branches/gsoc2010-decompiler/decompiler/control_flow.cpp
    tools/branches/gsoc2010-decompiler/decompiler/control_flow.h
    tools/branches/gsoc2010-decompiler/decompiler/graph.h
    tools/branches/gsoc2010-decompiler/decompiler/scummv6/codegen.cpp
    tools/branches/gsoc2010-decompiler/decompiler/test/cfg_test.h

Modified: tools/branches/gsoc2010-decompiler/decompiler/codegen.cpp
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/codegen.cpp	2010-08-02 14:38:20 UTC (rev 51620)
+++ tools/branches/gsoc2010-decompiler/decompiler/codegen.cpp	2010-08-02 14:38:45 UTC (rev 51621)
@@ -35,7 +35,7 @@
 
 std::ostream &IntEntry::print(std::ostream &output) const {
 	if (_isSigned)
-		output << _val;
+		output << (int32)_val;
 	else
 		output << (uint32)_val;
 	return output;
@@ -155,7 +155,7 @@
 	p = GET(entryPoint);
 	while (p != NULL) {
 		for (std::vector<CodeLine>::iterator it = p->_code.begin(); it != p->_code.end(); ++it) {
-			if (it->_unindentBefore && _indentLevel != 0)
+			if (it->_unindentBefore /*&& _indentLevel != 0*/)
 				_indentLevel--;
 			_output << boost::format("%08X: %s") % p->_start->_address % indentString(it->_line) << std::endl;
 			if (it->_indentAfter)
@@ -163,6 +163,9 @@
 		}
 		p = p->_next;
 	}
+
+	if (_indentLevel != 0)
+		std::cerr << "WARNING: Indent level ended at " << _indentLevel << std::endl;
 }
 
 void CodeGenerator::addOutputLine(std::string s, bool unindentBefore, bool indentAfter) {
@@ -187,7 +190,7 @@
 	for (InEdgeIterator ie = ier.first; ie != ier.second; ++ie) {
 		GraphVertex in = boost::source(*ie, _g);
 		GroupPtr inGroup = GET(in);
-		if (!boost::get(boost::edge_attribute, _g, *ie)._isJump)
+		if (!boost::get(boost::edge_attribute, _g, *ie)._isJump || inGroup->_stackLevel == -1)
 			continue;
 		switch (inGroup->_type) {
 		case kDoWhileCond:
@@ -330,8 +333,11 @@
 	} while (it++ != _curGroup->_end);
 
 	// Add else end if necessary
-	if (_curGroup->_endElse != NULL && !_curGroup->_endElse->_coalescedElse)
-		addOutputLine("}", true, false);
+	for(ElseEndIterator elseIt = _curGroup->_endElse.begin(); elseIt != _curGroup->_endElse.end(); ++elseIt)
+	{
+		if (!(*elseIt)->_coalescedElse)
+			addOutputLine("}", true, false);
+	}
 }
 
 void CodeGenerator::addArg(EntryPtr p) {
@@ -347,7 +353,7 @@
 			addArg(_stack.pop());
 			break;
 		default:
-			std::cerr << boost::format("Unknown character in metadata: %c\n") % c ;
+			std::cerr << boost::format("WARNING: Unknown character in metadata: %c\n") % c ;
 			break;
 	}
 }

Modified: tools/branches/gsoc2010-decompiler/decompiler/control_flow.cpp
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/control_flow.cpp	2010-08-02 14:38:20 UTC (rev 51620)
+++ tools/branches/gsoc2010-decompiler/decompiler/control_flow.cpp	2010-08-02 14:38:45 UTC (rev 51621)
@@ -162,6 +162,8 @@
 		if (expectedStackLevel > grNext->_stackLevel)
 			expectedStackLevel = grNext->_stackLevel;
 
+		grCur->_stackLevel = expectedStackLevel;
+
 		stackLevel += curInst->_stackChange;
 
 		// Group ends after a jump
@@ -176,8 +178,17 @@
 			continue;
 		}
 
+		// If group has no instructions with stack effect >= 0, don't merge on balanced stack
+		bool forceMerge = true;
+		ConstInstIterator it = grCur->_start;
+		do {
+			if (it->_stackChange >= 0)
+				forceMerge = false;
+			++it;
+		} while (grCur->_start != grCur->_end && it != grCur->_end);
+
 		// Group ends when stack is balanced, unless just before conditional jump
-		if (stackLevel == expectedStackLevel && nextInst->_type != kCondJump && nextInst->_type != kCondJumpRel) {
+		if (stackLevel == expectedStackLevel && !forceMerge && nextInst->_type != kCondJump && nextInst->_type != kCondJumpRel) {
 			continue;
 		}
 
@@ -185,7 +196,7 @@
 		merge(cur, next);
 	}
 
-	detectShortCircuit();
+	//detectShortCircuit();
 }
 
 void ControlFlow::detectShortCircuit() {
@@ -230,7 +241,8 @@
 	detectWhile();
 	detectBreak();
 	detectContinue();
-	detectIf();	
+	detectIf();
+	detectElse();
 	return _g;
 }
 
@@ -279,7 +291,7 @@
 		if (gr->_type == kNormal && (gr->_end->_type == kJump || gr->_end->_type == kJumpRel) && out_degree(*v, _g) == 1) {
 			OutEdgeIterator oe = boost::out_edges(*v, _g).first;
 			GraphVertex target = boost::target(*oe, _g);
-			GroupPtr targetGr = GET(target);			
+			GroupPtr targetGr = GET(target);
 			// ...to somewhere later in the code...
 			if (gr->_start->_address >= targetGr->_start->_address)
 				continue;
@@ -308,16 +320,20 @@
 			// ...to a while or do-while condition...
 			if (targetGr->_type == kWhileCond || targetGr->_type == kDoWhileCond) {
 				bool isContinue = true;
-				// ...unless it is targeting a while condition...
-				if (targetGr->_type == kWhileCond) {
-					OutEdgeRange toer = boost::out_edges(target, _g);
-					for (OutEdgeIterator toe = toer.first; toe != toer.second; ++toe) {
-						// ...which jumps to the next sequential group
-						if (GET(boost::target(*toe, _g)) == gr->_next)
-							isContinue = false;
-					}
+				// ...unless...
+				OutEdgeRange toer = boost::out_edges(target, _g);
+				bool afterJumpTargets = true;
+				for (OutEdgeIterator toe = toer.first; toe != toer.second; ++toe) {
+					// ...it is targeting a while condition which jumps to the next sequential group
+					if (targetGr->_type == kWhileCond && GET(boost::target(*toe, _g)) == gr->_next)
+						isContinue = false;
+					// ...or the instruction is placed after all jump targets from condition
+					if (GET(boost::target(*toe, _g))->_start->_address > gr->_start->_address)
+						afterJumpTargets = false;
 				}
-	
+				if (afterJumpTargets)
+					isContinue = false;
+
 				if (isContinue && validateBreakOrContinue(gr, targetGr))
 					gr->_type = kContinue;
 			}
@@ -366,9 +382,18 @@
 	for (VertexIterator v = vr.first; v != vr.second; ++v) {
 		GroupPtr gr = GET(*v);
 		// if: Undetermined block with conditional jump
-		if (gr->_type == kNormal && out_degree(*v, _g) == 2) {
+		if (gr->_type == kNormal && (gr->_end->_type == kCondJump || gr->_end->_type == kCondJumpRel)) {
 			gr->_type = kIfCond;
+		}
+	}
+}
 
+void ControlFlow::detectElse() {
+	VertexRange vr = boost::vertices(_g);
+	for (VertexIterator v = vr.first; v != vr.second; ++v) {
+		GroupPtr gr = GET(*v);
+		// if: Undetermined block with conditional jump
+		if (gr->_type == kIfCond && (gr->_end->_type == kCondJump || gr->_end->_type == kCondJumpRel)) {
 			OutEdgeRange oer = boost::out_edges(*v, _g);
 			GraphVertex target;
 			uint32 maxAddress = 0;
@@ -392,9 +417,60 @@
 			OutEdgeIterator toe = boost::out_edges(find(targetGr->_prev->_start->_address), _g).first;
 			GroupPtr targetTargetGr = GET(boost::target(*toe, _g));
 			if (targetTargetGr->_start->_address > targetGr->_end->_address) {
-				targetGr->_startElse = true;
-				targetTargetGr->_prev->_endElse = targetGr.get();
+				if (validateElseBlock(gr, targetGr, targetTargetGr)) {
+					targetGr->_startElse = true;
+					targetTargetGr->_prev->_endElse.push_back(targetGr.get());
+				}
 			}
 		}
 	}
 }
+
+bool ControlFlow::validateElseBlock(GroupPtr ifGroup, GroupPtr start, GroupPtr end) {
+	for (GroupPtr cursor = start; cursor != end; cursor = cursor->_next) {
+		if (cursor->_type == kIfCond || cursor->_type == kWhileCond || cursor->_type == kDoWhileCond) {
+			// Validate outgoing edges of conditions
+			OutEdgeRange oer = boost::out_edges(find(cursor->_start), _g);
+			for (OutEdgeIterator oe = oer.first; oe != oer.second; ++oe) {
+				GraphVertex target = boost::target(*oe, _g);
+				GroupPtr targetGr = GET(target);
+				// Each edge from condition must not leave the range [start, end]
+				if (start->_start->_address > targetGr->_start->_address || targetGr->_start->_address > end->_start->_address)
+					return false;
+			}
+		}
+
+		// If group ends an else, that else must start inside the range
+		for(ElseEndIterator it = cursor->_endElse.begin(); it != cursor->_endElse.end(); ++it)
+		{
+			if ((*it)->_start->_address < start->_start->_address)
+				return false;
+		}
+
+		// Unless group is a simple unconditional jump...
+		if (cursor->_start->_type == kJump || cursor->_start->_type == kJumpRel)
+			continue;
+
+		// ...validate ingoing edges
+		InEdgeRange ier = boost::in_edges(find(cursor->_start), _g);
+		for (InEdgeIterator ie = ier.first; ie != ier.second; ++ie) {
+			GraphVertex source = boost::source(*ie, _g);
+			GroupPtr sourceGr = GET(source);
+
+			// Edges going to conditions...
+			if (sourceGr->_type == kIfCond || sourceGr->_type == kWhileCond || sourceGr->_type == kDoWhileCond) {
+				// ...must not come from outside the range [start, end]...
+				if (start->_start->_address > sourceGr->_start->_address || sourceGr->_start->_address > end->_start->_address) {
+					// ...unless source is simple unconditional jump...
+					if (sourceGr->_start->_type == kJump || sourceGr->_start->_type == kJumpRel)
+						continue;
+					// ...or the edge is from the if condition associated with this else
+					if (ifGroup == sourceGr)
+						continue;
+					return false;
+				}
+			}
+		}
+	}
+	return true;
+}

Modified: tools/branches/gsoc2010-decompiler/decompiler/control_flow.h
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/control_flow.h	2010-08-02 14:38:20 UTC (rev 51620)
+++ tools/branches/gsoc2010-decompiler/decompiler/control_flow.h	2010-08-02 14:38:45 UTC (rev 51621)
@@ -111,11 +111,27 @@
 	bool validateBreakOrContinue(GroupPtr gr, GroupPtr condGr);
 
 	/**
-	 * Detects if and else blocks.
+	 * Detects if blocks.
 	 * Must be performed after break and continue detection.
 	 */
 	void detectIf();
 
+	/**
+	 * Detects else blocks.
+	 * Must be performed after if detection.
+	 */
+	void detectElse();
+
+	/**
+	 * Checks if a candidate else block will cross block boundaries.
+	 *
+	 * @param ifGroup The group containing the if this else candidate is associated with.
+	 * @param start   The group containing the start of the else.
+	 * @param end     The group immediately after the group ending the else.
+	 * @returns True if the validation succeeded, false if it did not.
+	 */
+	bool validateElseBlock(GroupPtr ifGroup, GroupPtr start, GroupPtr end);
+
 public:
 	/**
 	 * Gets the current control flow graph.

Modified: tools/branches/gsoc2010-decompiler/decompiler/graph.h
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/graph.h	2010-08-02 14:38:20 UTC (rev 51620)
+++ tools/branches/gsoc2010-decompiler/decompiler/graph.h	2010-08-02 14:38:45 UTC (rev 51621)
@@ -218,6 +218,16 @@
 };
 
 /**
+ * Weak references to groups ending else blocks.
+ */
+typedef std::vector<Group *> ElseEnds;
+
+/**
+ * Iterator type for ElseEnds
+ */
+typedef ElseEnds::iterator ElseEndIterator;
+
+/**
  * Structure representing a group of instructions.
  */
 struct Group {
@@ -233,7 +243,7 @@
 	int _stackLevel;             ///< Level of the stack upon entry.
 	GroupType _type;             ///< Type of the group.
 	bool _startElse;             ///< Group is start of an else block.
-	Group *_endElse;             ///< Group is end of an else block.
+	ElseEnds _endElse;           ///< Group is end of an else block.
 	Group *_prev;                ///< Pointer to the previous group, when ordered by address. Used for short-circuit analysis.
 	Group *_next;                ///< Pointer to the next group, when ordered by address.
 	std::vector<CodeLine> _code; ///< Container for decompiled lines of code.
@@ -260,7 +270,6 @@
 		_type = kNormal;
 		_prev = prev.get();
 		_startElse = false;
-		_endElse = NULL;
 		if (_prev != NULL)
 			_prev->_next = this;
 		_next = NULL;
@@ -299,8 +308,10 @@
 		output << "\\n";
 		if (group->_startElse)
 			output << "Start of else\\n";
-		if (group->_endElse)
-			output << boost::format("End of else at %08x\\n") % group->_endElse->_start->_address;
+		for(ElseEndIterator it = group->_endElse.begin(); it != group->_endElse.end(); ++it)
+		{
+			output << boost::format("End of else at %08x\\n") % (*it)->_start->_address;
+		}
 		output << "|";
 		ConstInstIterator inst = group->_start;
 		do {

Modified: tools/branches/gsoc2010-decompiler/decompiler/scummv6/codegen.cpp
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/scummv6/codegen.cpp	2010-08-02 14:38:20 UTC (rev 51620)
+++ tools/branches/gsoc2010-decompiler/decompiler/scummv6/codegen.cpp	2010-08-02 14:38:45 UTC (rev 51621)
@@ -106,7 +106,7 @@
 			default:
 				{
 					std::stringstream s;
-					s << boost::format("Unknown opcode %X at address %08X") % inst._opcode % inst._address;
+					s << boost::format("WARNING: Unknown opcode %X at address %08X") % inst._opcode % inst._address;
 					addOutputLine(s.str());
 				}
 				break;
@@ -131,7 +131,7 @@
 		default:
 			{
 				std::stringstream s;
-				s << boost::format("Couldn't handle conditional jump at address %08X") % inst._address;
+				s << boost::format("WARNING: Couldn't handle conditional jump at address %08X") % inst._address;
 				addOutputLine(s.str());
 			}
 			break;
@@ -166,7 +166,7 @@
 		default:
 			{
 				std::stringstream s;
-				s << boost::format("Unknown opcode %X at address %08X") % inst._opcode % inst._address;
+				s << boost::format("WARNING: Unknown opcode %X at address %08X") % inst._opcode % inst._address;
 				addOutputLine(s.str());
 			}
 			break;
@@ -207,7 +207,7 @@
 		default:
 			{
 				std::stringstream s;
-				s << boost::format("Unknown opcode %X at address %08X") % inst._opcode % inst._address;
+				s << boost::format("WARNING: Unknown opcode %X at address %08X") % inst._opcode % inst._address;
 				addOutputLine(s.str());
 			}
 			break;
@@ -216,7 +216,7 @@
 	default:
 		{
 			std::stringstream s;
-			s << boost::format("Unknown opcode %X at address %08X") % inst._opcode % inst._address;
+			s << boost::format("WARNING: Unknown opcode %X at address %08X") % inst._opcode % inst._address;
 			addOutputLine(s.str());
 		}
 		break;
@@ -445,7 +445,7 @@
 				addArg(new IntEntry(inst._params[0].getUnsigned(), false));
 				break;
 			default:
-				std::cerr << boost::format("Unexpected type for parameter 0 @ %08X while processing metadata character %c") % inst._address % c;
+				std::cerr << boost::format("WARNING: Unexpected type for parameter 0 @ %08X while processing metadata character %c") % inst._address % c;
 				break;
 			}
 			break;

Modified: tools/branches/gsoc2010-decompiler/decompiler/test/cfg_test.h
===================================================================
--- tools/branches/gsoc2010-decompiler/decompiler/test/cfg_test.h	2010-08-02 14:38:20 UTC (rev 51620)
+++ tools/branches/gsoc2010-decompiler/decompiler/test/cfg_test.h	2010-08-02 14:38:45 UTC (rev 51621)
@@ -376,7 +376,7 @@
 			GroupPtr gr = GET(*it);
 			if (gr->_start->_address == 0x10) {
 				TS_ASSERT(gr->_startElse);
-				TS_ASSERT(gr->_endElse == gr);
+				TS_ASSERT(gr->_endElse.size() == 1 && gr->_endElse[0] == gr);
 			}
 		}
 		delete c;
@@ -525,37 +525,38 @@
 			case 0x6:
 				TS_ASSERT(gr->_type == kWhileCond);
 				TS_ASSERT(!gr->_startElse);
-				TS_ASSERT(!gr->_endElse);
+				TS_ASSERT(gr->_endElse.empty());
 				break;
 			case 0x19:
 			case 0x3A:
 			case 0x4F:
 			case 0x68:
+			case 0x74: // Allow inclusion of the pop instruction immediately before
 			case 0x75:
 			case 0x92:
 				TS_ASSERT(gr->_type == kIfCond);
 				TS_ASSERT(!gr->_startElse);
-				TS_ASSERT(!gr->_endElse);
+				TS_ASSERT(gr->_endElse.empty());
 				break;
 			case 0x8B:
 				TS_ASSERT(gr->_type == kNormal);
 				TS_ASSERT(gr->_startElse);
-				TS_ASSERT(gr->_endElse && gr->_endElse->_start->_address == 0x8B);
+				TS_ASSERT(gr->_endElse.size() == 1 && gr->_endElse[0]->_start->_address == 0x8B);
 				break;
 			case 0x91:
-				TS_ASSERT(gr->_type == kNormal);
+				TS_ASSERT(gr->_type == kNormal ||  gr->_type == kIfCond); // Allow inclusion of the pop instruction immediately before
 				TS_ASSERT(gr->_startElse);
-				TS_ASSERT(!gr->_endElse);
+				TS_ASSERT(gr->_endElse.empty());
 				break;
 			case 0xA6:
 				TS_ASSERT(gr->_type == kNormal);
 				TS_ASSERT(!gr->_startElse);
-				TS_ASSERT(gr->_endElse && gr->_endElse->_start->_address == 0x91);
+				TS_ASSERT(gr->_endElse.size() == 1 && gr->_endElse[0]->_start->_address == 0x91);
 				break;
 			default:
 				TS_ASSERT(gr->_type == kNormal);
 				TS_ASSERT(!gr->_startElse);
-				TS_ASSERT(!gr->_endElse);
+				TS_ASSERT(gr->_endElse.empty());
 				break;
 			}
 		}


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list