[Scummvm-cvs-logs] SF.net SVN: scummvm:[41638] tools/trunk/compress_gob.cpp

strangerke at users.sourceforge.net strangerke at users.sourceforge.net
Thu Jun 18 20:06:56 CEST 2009


Revision: 41638
          http://scummvm.svn.sourceforge.net/scummvm/?rev=41638&view=rev
Author:   strangerke
Date:     2009-06-18 18:06:56 +0000 (Thu, 18 Jun 2009)

Log Message:
-----------
Avoid to compress duplicate files by using one chunk for more than one file, giving (when it occurs) a better overall compression ratio.

Modified Paths:
--------------
    tools/trunk/compress_gob.cpp

Modified: tools/trunk/compress_gob.cpp
===================================================================
--- tools/trunk/compress_gob.cpp	2009-06-18 13:59:49 UTC (rev 41637)
+++ tools/trunk/compress_gob.cpp	2009-06-18 18:06:56 UTC (rev 41638)
@@ -26,9 +26,9 @@
 
 struct Chunk {
 	char name[64];
-	uint32 size, offset;
-	bool packed;
-
+	uint32 size, realSize, offset;
+	uint8 packed;
+	Chunk *replChunk;
 	Chunk *next;
 
 	Chunk() : next(0) { }
@@ -100,7 +100,13 @@
 Chunk *readChunkConf(FILE *gobConf, uint16 &chunkCount) {
 	Chunk *chunks = new Chunk;
 	Chunk *curChunk = chunks;
-	char buffer [1024];
+	Chunk *parseChunk;
+	FILE *src1, *src2;
+	char buffer[1024];
+	char buf1[4096];
+	char buf2[4096];
+	uint8 checkFl;
+	uint16 readCount;
 
 	chunkCount = 1;
 
@@ -121,6 +127,42 @@
 		else
 			curChunk->packed = false;
 
+		if (! (src1 = fopen(curChunk->name, "rb"))) {
+			error("Unable to read %s", curChunk->name);
+		}
+		fseek(src1, 0, SEEK_END);
+// if file is too small, force 'Store' method
+		if ((curChunk->realSize = ftell(src1)) < 8) 
+			curChunk->packed = 0;
+
+		parseChunk = chunks;
+		while (parseChunk != curChunk) {
+			if ((parseChunk->realSize == curChunk->realSize) & (parseChunk->packed != 2)) {
+				if (strcmp(parseChunk->name, curChunk->name) == 0)
+					error("Duplicate filename found in conf file: %s", parseChunk->name);
+				rewind(src1);
+				src2 = fopen(parseChunk->name, "rb");
+				checkFl = 0;
+				do {
+					readCount = fread(buf1, 1, 4096, src1);
+					fread(buf2, 1, 4096, src2);
+					for (int i = 0; (i < readCount) & (checkFl == 0); i++)
+						if (buf1[i] != buf2[i])
+							checkFl = 1;
+				} while ((readCount == 4096) & (checkFl == 0));
+				fclose(src2);
+				if (checkFl == 0) {
+// If files are identical, use the same compressed chunk instead of re-compressing the same thing
+					curChunk->packed = 2;
+					curChunk->replChunk = parseChunk;
+					printf("Identical files : %s %s (%d bytes)\n", curChunk->name, parseChunk->name, curChunk->realSize);
+					break;
+				}
+			}
+			parseChunk = parseChunk->next;
+		}
+		fclose(src1);
+		
 		fscanf(gobConf, "%s", buffer);
 		if (!feof(gobConf)) {
 			curChunk->next = new Chunk;
@@ -145,33 +187,34 @@
 void writeBody(FILE *stk, uint16 chunkCount, Chunk *chunks) {
 	Chunk *curChunk = chunks;
 	FILE *src;
-	uint32 realSize, tmpSize, filPos;
+	uint32 tmpSize;
 	int count;
 	char buffer[4096];
 
 	while(curChunk) {
 		if (!(src = fopen(curChunk->name, "rb")))
-			error("Couldn't open conf file \"%s\"", curChunk->name);
+			error("Couldn't open file \"%s\"", curChunk->name);
 
-		realSize = fileSize(src);
+		if (curChunk->packed == 2)
+			printf("Identical file %12s\t(compressed size %d bytes)\n", curChunk->name, curChunk->replChunk->size);
 
-		if (curChunk->packed) {
+		curChunk->offset = ftell(stk);
+		if (curChunk->packed == 1) {
 			printf("Compressing %12s\t", curChunk->name);
-			filPos = ftell(stk);
 			curChunk->size = writeBodyPackFile(stk, src);
-			printf("%d -> %d bytes", realSize, curChunk->size);
-			if (curChunk->size >= realSize) {
+			printf("%d -> %d bytes", curChunk->realSize, curChunk->size);
+			if (curChunk->size >= curChunk->realSize) {
 // If compressed size >= realsize, compression is useless
 // => Store instead
 				curChunk->packed = 0;
-				fseek(stk, filPos, SEEK_SET);
+				fseek(stk, curChunk->offset, SEEK_SET);
 				rewind(src);
 				printf("!!!");
 			}
 			printf("\n");
 		} 
 
-		if (!curChunk->packed) {
+		if (curChunk->packed == 0) {
 			tmpSize = 0;
 			printf("Storing %12s\t", curChunk->name);
 			do {
@@ -208,7 +251,6 @@
 	uint16 i;
 	char buffer[1024];
 	Chunk *curChunk = chunks;
-	uint32 filPos;
 
 	rewind(stk);
 
@@ -220,8 +262,6 @@
 //+ 4  bytes : size of the chunk
 //+ 4  bytes : start position of the chunk in the file
 //+ 1  byte  : If 0 : not compressed, if 1 : compressed
-	filPos = 2 + (chunkCount * 22);
-
 	buffer[0] = chunkCount & 0xFF;
 	buffer[1] = chunkCount >> 8;
 	fwrite(buffer, 1, 2, stk);
@@ -234,20 +274,29 @@
 				buffer[i] = '\0';
 		fwrite(buffer, 1, 13, stk);
 
-		buffer[0] = curChunk->size;
-		buffer[1] = curChunk->size >> 8;
-		buffer[2] = curChunk->size >> 16;
-		buffer[3] = curChunk->size >> 24;
-		buffer[4] = filPos;
-		buffer[5] = filPos >> 8;
-		buffer[6] = filPos >> 16;
-		buffer[7] = filPos >> 24;
-
-		buffer[8] = curChunk->packed ? 0x1 : 0x0;
-
+		if (curChunk->packed == 2)
+		{
+			buffer[0] = curChunk->replChunk->size;
+			buffer[1] = curChunk->replChunk->size >> 8;
+			buffer[2] = curChunk->replChunk->size >> 16;
+			buffer[3] = curChunk->replChunk->size >> 24;
+			buffer[4] = curChunk->replChunk->offset;
+			buffer[5] = curChunk->replChunk->offset >> 8;
+			buffer[6] = curChunk->replChunk->offset >> 16;
+			buffer[7] = curChunk->replChunk->offset >> 24;
+			buffer[8] = curChunk->replChunk->packed;
+		} else {
+			buffer[0] = curChunk->size;
+			buffer[1] = curChunk->size >> 8;
+			buffer[2] = curChunk->size >> 16;
+			buffer[3] = curChunk->size >> 24;
+			buffer[4] = curChunk->offset;
+			buffer[5] = curChunk->offset >> 8;
+			buffer[6] = curChunk->offset >> 16;
+			buffer[7] = curChunk->offset >> 24;
+			buffer[8] = curChunk->packed;
+		}
 		fwrite(buffer, 1, 9, stk);
-		filPos += curChunk->size;
-
 		curChunk = curChunk->next;
 	}
 	return;


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list