[Scummvm-cvs-logs] SF.net SVN: scummvm:[40380] tools/trunk/extract_gob_stk.cpp

strangerke at users.sourceforge.net strangerke at users.sourceforge.net
Fri May 8 13:37:39 CEST 2009


Revision: 40380
          http://scummvm.svn.sourceforge.net/scummvm/?rev=40380&view=rev
Author:   strangerke
Date:     2009-05-08 11:37:39 +0000 (Fri, 08 May 2009)

Log Message:
-----------
Implement recent Pre and Post-Gob compressions based on file analysis:
- Geisha : Experimental handling of 0OT twisted behavior
- Gob '2000-2006' era (approx.) : First implementation of new file structure (STK2.1) used for ITK and STK files. 5 bytes are yet unknown in one of the sections, so implementation *should not be considered* as complete, even if files extracted look OK.

Modified Paths:
--------------
    tools/trunk/extract_gob_stk.cpp

Modified: tools/trunk/extract_gob_stk.cpp
===================================================================
--- tools/trunk/extract_gob_stk.cpp	2009-05-08 11:24:20 UTC (rev 40379)
+++ tools/trunk/extract_gob_stk.cpp	2009-05-08 11:37:39 UTC (rev 40380)
@@ -23,9 +23,10 @@
 #include "util.h"
 
 struct Chunk {
-	char name[14];
+	char name[64];
 	uint32 size, offset;
-	bool packed; 
+	bool packed;
+	bool preGob;
 
 	Chunk *next;
 
@@ -35,10 +36,14 @@
 
 void extractError(FILE *f1, FILE *f2, Chunk *chunks, const char *msg);
 Chunk *readChunkList(FILE *stk);
+Chunk *readChunkListV2(FILE *stk);
 void extractChunks(FILE *stk, Chunk *chunks);
 byte *unpackData(byte *src, uint32 &size);
+byte *unpackPreGobData(byte *src, uint32 &size, uint32 &compSize);
 
 int main(int argc, char **argv) {
+	char signature[7];
+	Chunk *chunks;
 
 	if ((argc < 2) || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
 
@@ -53,10 +58,21 @@
 	if (!(stk = fopen(argv[1], "rb")))
 		error("Couldn't open file \"%s\"", argv[1]);
 
-	Chunk *chunks = readChunkList(stk);
+	if (fread(signature, 1, 6, stk) < 6)
+		error("Unexpected EOF while reading signature in \"%s\"", argv[1]);
+	
+	if (strncmp(signature, "STK2.1", 6)==0)
+	{
+		warning("Signature of new STK format (STK 2.1) detected in file \"%s\"", argv[1]);
+		chunks = readChunkListV2(stk);
+
+	} else {
+		rewind(stk);
+		chunks = readChunkList(stk);
+	}
+	
 	extractChunks(stk, chunks);
 	delete chunks;
-
 	fclose(stk);
 	return 0;
 }
@@ -75,15 +91,25 @@
 	uint16 numDataChunks = readUint16LE(stk);
 	Chunk *chunks = new Chunk;
 	Chunk *curChunk = chunks;
+	char *fakeTotPtr; 
 
 	while (numDataChunks-- > 0) {
-		if (fread(curChunk->name, 13, 1, stk) < 1)
+		if (fread(curChunk->name, 1, 13, stk) < 13)
 			extractError(stk, 0, chunks, "Unexpected EOF");
 
 		curChunk->size = readUint32LE(stk);
 		curChunk->offset = readUint32LE(stk);
 		curChunk->packed = readByte(stk) != 0;
+		curChunk->preGob = false;
 
+// Geisha TOTs are compressed without having the flag set
+		fakeTotPtr = strstr(curChunk->name, "0OT");
+		if (fakeTotPtr != 0) {
+			strncpy(fakeTotPtr, "TOT", 3);
+			curChunk->packed = true;
+			curChunk->preGob = true;
+		}
+
 		if (numDataChunks > 0) {
 			curChunk->next = new Chunk;
 			curChunk = curChunk->next;
@@ -93,12 +119,137 @@
 	return chunks;
 }
 
+Chunk *readChunkListV2(FILE *stk) {
+	uint32 numDataChunks;
+	Chunk *chunks = new Chunk;
+	Chunk *curChunk = chunks;
+
+//	char *fakeTotPtr; 
+	
+	int cpt = 0;
+	char buffer[64];
+	char debugStr[256];
+	uint32 filenamePos;
+	uint32 miscPos;
+	uint32 filePos;
+	uint32 compressFlag;
+	uint32 decompSize;
+	
+	// Header (Signature already read)
+	// ======
+	// Structure of header is :
+	// + 06 bytes : Signature
+	// + 14 bytes : Date time of STK/ITK creation (format DDMMYYYYHH24MISS)
+	// + 08 bytes : Name / acronym of STK/ITK creator
+	// + 04 bytes : Start position of Filenames Section
+
+	if (fread(buffer, 1, 14, stk) < 14) 
+		extractError(stk, 0, chunks, "Unexpected EOF");
+	buffer[14]='\0';
+	sprintf(debugStr, "File generated on %s by ", buffer);
+
+	if (fread(buffer, 1, 8, stk) < 8)
+		extractError(stk, 0, chunks, "Unexpected EOF");
+	buffer[8] = '\0';
+	strcat(debugStr, buffer);
+	printf("%s\n",debugStr);
+	filenamePos = readUint32LE(stk);
+
+	// Filenames - Header
+	// ==================
+	// Structure of the header of Filename section is :
+	// + 04 bytes : Number of files stored in STK/ITK
+	// + 04 bytes : Start position of Misc Section
+
+	if (fseek(stk, filenamePos, SEEK_SET)!=0)
+		extractError(stk, 0, chunks, "Unable to locate Filename Section");
+
+	numDataChunks = readUint32LE(stk);
+	miscPos = readUint32LE(stk);
+	
+	if (numDataChunks == 0)
+		extractError(stk, 0, chunks, "Empty ITK/STK !");
+
+	while (numDataChunks-- > 0) {
+		// Misc
+		// ====
+		// This section contains Misc infos concerning the files.
+		// For each file, the info is the following :
+		// + 04 bytes : Start position of the filename
+		// + 14 bytes : Date time of the file last modification (format DDMMYYYYHH24MISS)
+		// + 14 bytes : Date time of the file creation (format DDMMYYYYHH24MISS)
+		// + 08 bytes : Name / acronym of STK/ITK creator
+		// + 04 bytes : File section size
+		// + 04 bytes : Uncompressed file size (redondant with info in File Section)
+		// TODO : Understand the use of the unknown bytes !
+		// + 05 bytes : Unknown
+		// + 04 bytes : Start position of the File Section
+		// + 04 bytes : Compression flag (AFAIK : 0= uncompressed, 1= compressed)
+
+		if (fseek(stk, miscPos+(cpt*61), SEEK_SET)!=0)
+			extractError(stk, 0, chunks, "Unable to locate Misc Section");
+		filenamePos = readUint32LE(stk);
+		
+		if (fread(buffer, 1, 36, stk) < 36)
+			extractError(stk, 0, chunks, "Unexpected EOF in Misc Section");
+		curChunk->size = readUint32LE(stk);
+		decompSize = readUint32LE(stk);
+
+		if (fread(buffer, 1, 5, stk) < 5)
+			extractError(stk, 0, chunks, "Unexpected EOF in Misc Section");
+
+		filePos = readUint32LE(stk);
+		compressFlag = readUint32LE(stk);
+
+		if (compressFlag == 1)
+			curChunk->packed=true;
+		else {
+			if ((curChunk->size != decompSize) | (compressFlag != 0))
+			{
+				sprintf(debugStr, "Unexpected value in compress flag : %d - Size : %d Uncompressed size : %d", compressFlag, curChunk->size, decompSize);
+				extractError(stk, 0, chunks, debugStr);
+			} else
+				curChunk->packed=false;
+		}
+
+		// Filenames
+		// =========
+		// Filename are stored one after the other, separated by 0x00.
+		// Those are now long filenames, at the opposite of previous STK version.
+
+		if (fseek(stk, filenamePos, SEEK_SET)!=0)
+			extractError(stk, 0, chunks, "Unable to locate filename");
+		
+		if (fgets(curChunk->name, 64, stk)==0)
+			extractError(stk, 0, chunks, "Unable to read filename");
+
+		// Files
+		// =====
+		// The structure of the file section if the following :
+		// + 04 bytes : Uncompressed size (redondant with the one in Misc info)
+		// + ?? bytes : Compressed data
+
+		curChunk->offset = filePos;
+		curChunk->preGob = false;
+
+		if (numDataChunks > 0) {
+			curChunk->next = new Chunk;
+			curChunk = curChunk->next;
+		}
+		cpt++;
+	}
+
+	return chunks;
+}
+
 void extractChunks(FILE *stk, Chunk *chunks) {
 	Chunk *curChunk = chunks;
+	byte *unpackedData;
 
 	while (curChunk != 0) {
 		printf("Extracting \"%s\"\n", curChunk->name);
 
+
 		FILE *chunkFile;
 		if (!(chunkFile = fopen(curChunk->name, "wb")))
 			extractError(stk, 0, chunks, "Couldn't write file");
@@ -114,14 +265,20 @@
 		if (curChunk->packed) {
 			uint32 realSize;
 
-			byte *unpackedData = unpackData(data, realSize);
+			if (curChunk->preGob) {
+				unpackedData = unpackPreGobData(data, realSize, curChunk->size);
+			}
+			else
+			{
+				unpackedData = unpackData(data, realSize);
+			}
 
 			if (fwrite((char *) unpackedData, realSize, 1, chunkFile) < 1)
 				extractError(stk, chunkFile, chunks, "Couldn't write");
 
 			delete[] unpackedData;
 
-		} else
+		} else 
 			if (fwrite((char *) data, curChunk->size, 1, chunkFile) < 1)
 				extractError(stk, chunkFile, chunks, "Couldn't write");
 
@@ -190,3 +347,81 @@
 
 	return unpacked;
 }
+
+// Some LZ77-variant
+byte *unpackPreGobData(byte *src, uint32 &size, uint32 &compSize) {
+//	uint32 counter;
+	uint16 cmd;
+	byte tmpBuf[4114];
+	int16 off;
+	byte len;
+	uint16 tmpIndex;
+	uint32 dummy1;
+	int32 newCounter;
+
+	newCounter = compSize;
+	size = 0;
+
+    dummy1 = READ_LE_UINT32(src);
+	READ_LE_UINT16(src);
+
+//	counter = size = 32768;//READ_LE_UINT32(src);
+
+	for (int i = 0; i < 4078; i++)
+		tmpBuf[i] = 0x20;
+	tmpIndex = 4078;
+
+	src += 6; 
+	newCounter -= 6;
+
+	byte *unpacked = new byte[500000];//[size];
+	byte *dest = unpacked;
+
+	cmd = 0;
+	while (1) {
+		cmd >>= 1;
+		if ((cmd & 0x0100) == 0) {
+			cmd = *src | 0xFF00;
+			src++;
+			newCounter--;
+			if (newCounter == 0)
+				break;		}
+		if ((cmd & 1) != 0) { /* copy */
+			*dest++ = *src;
+			size++;
+			tmpBuf[tmpIndex] = *src;
+			src++;
+			newCounter--;
+			if (newCounter == 0)
+				break;
+			tmpIndex++;
+			tmpIndex %= 4096;
+//			counter--;
+//			if (counter == 0)
+//				break;
+		} else { /* copy string */
+
+			off = *src++;
+			off |= (*src & 0xF0) << 4;
+			len = (*src & 0x0F) + 3;
+			src++;
+			newCounter -= 2;
+
+			for (int i = 0; i < len; i++) {
+				*dest++ = tmpBuf[(off + i) % 4096];
+				size++;
+//				if (--counter == 0)
+//					return unpacked;
+
+				tmpBuf[tmpIndex] = tmpBuf[(off + i) % 4096];
+				tmpIndex++;
+				tmpIndex %= 4096;
+			}
+			if (newCounter <= 0)
+				break;
+
+		}
+	}
+
+	return unpacked;
+}
\ No newline at end of file


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list