[Scummvm-cvs-logs] SF.net SVN: scummvm:[36338]	scummvm/trunk/backends/platform/wince
    knakos at users.sourceforge.net 
    knakos at users.sourceforge.net
       
    Sat Feb 14 20:42:18 CET 2009
    
    
  
Revision: 36338
          http://scummvm.svn.sourceforge.net/scummvm/?rev=36338&view=rev
Author:   knakos
Date:     2009-02-14 19:42:18 +0000 (Sat, 14 Feb 2009)
Log Message:
-----------
apply patch by Fingolfin to optimize scalers + more
Modified Paths:
--------------
    scummvm/trunk/backends/platform/wince/CEScaler.cpp
    scummvm/trunk/backends/platform/wince/CEScaler.h
    scummvm/trunk/backends/platform/wince/wince-sdl.cpp
Modified: scummvm/trunk/backends/platform/wince/CEScaler.cpp
===================================================================
--- scummvm/trunk/backends/platform/wince/CEScaler.cpp	2009-02-14 19:12:01 UTC (rev 36337)
+++ scummvm/trunk/backends/platform/wince/CEScaler.cpp	2009-02-14 19:42:18 UTC (rev 36338)
@@ -25,40 +25,8 @@
 #include "graphics/scaler/intern.h"
 #include "CEScaler.h"
 
-int redblueMasks[] = { 0x7C1F, 0xF81F };
-int greenMasks[] = { 0x03E0, 0x07E0 };
-
-static int maskUsed;
-
-void initCEScaler(void) {
-	if (gBitFormat == 555)
-		maskUsed = 0;
-	else
-		maskUsed = 1;
-}
-
-// FIXME: Fingolfin says: The following interpolation code is a lot slower than it needs
-// to be. The reason: Using the value of a global variable to index two global arrays is
-// extremly difficult if not impossible for the compiler to optimize. At the very least,
-// the two arrays should be 'static const', but even then, memory access is required.
-// To avoid this, one could use the techniques used by our other scalers. See also the
-// interpolate functions in graphics/scaler/intern.h.
-// Even if those can't be used directly for some reasons (e.g. the compiler has problems
-// with templates), then still the *techniques* could and should be used. I would exepct
-// that this way, even the C version of PocketPCPortrait() should get a big speed boost.
-
-static inline uint16 CEinterpolate16_4(uint16 p1, uint16 p2, uint16 p3, uint16 p4)
-{
-        return ((((p1 & redblueMasks[maskUsed]) + (p2 & redblueMasks[maskUsed]) + (p3 & redblueMasks[maskUsed]) + (p4 & redblueMasks[maskUsed])) / 4) & redblueMasks[maskUsed]) |
-               ((((p1 & greenMasks[maskUsed]) + (p2 & greenMasks[maskUsed]) + (p3 & greenMasks[maskUsed]) + (p4 & greenMasks[maskUsed])) / 4) & greenMasks[maskUsed]);
-}
-
-static inline uint16 CEinterpolate16_2(uint16 p1, int w1, uint16 p2, int w2) {
-        return ((((p1 & redblueMasks[maskUsed]) * w1 + (p2 & redblueMasks[maskUsed]) * w2) / (w1 + w2)) & redblueMasks[maskUsed]) |
-               ((((p1 & greenMasks[maskUsed]) * w1 + (p2 & greenMasks[maskUsed]) * w2) / (w1 + w2)) & greenMasks[maskUsed]);
-}
-
-void PocketPCPortrait(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+template<int bitFormat>
+void PocketPCPortraitTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
 	uint8 *work;
 	int i;
 
@@ -73,9 +41,9 @@
 			uint16 color3 = *(((const uint16 *)srcPtr) + (i + 2));
 			uint16 color4 = *(((const uint16 *)srcPtr) + (i + 3));
 
-			*(((uint16 *)work) + 0) = CEinterpolate16_2(color1, 3, color2, 1);
-			*(((uint16 *)work) + 1) = CEinterpolate16_2(color2, 1, color3, 1);
-			*(((uint16 *)work) + 2) = CEinterpolate16_2(color3, 1, color4, 3);
+			*(((uint16 *)work) + 0) = interpolate32_3_1<bitFormat>(color1, color2);
+			*(((uint16 *)work) + 1) = interpolate32_1_1<bitFormat>(color2, color3);
+			*(((uint16 *)work) + 2) = interpolate32_3_1<bitFormat>(color4, color3);
 
 			work += 3 * sizeof(uint16);
 		}
@@ -83,61 +51,66 @@
 		dstPtr += dstPitch;
 	}
 }
+MAKE_WRAPPER(PocketPCPortrait)
 
-// FIXME: Fingolfin says: Please document this function. What does it compute? How
-// does it differ from the code in aspect.cpp ? It would be nice to speed up this function
-// here using the ideas and tracks from aspect.cpp and the comment above, as right now, it
-// is rather hard for the compiler to optimize this code properly.
+// Our version of an aspect scaler. Main difference is the out-of-place
+// operation, omitting a straight blit step the sdl backend does. Also,
+// tests show unaligned access errors with the stock aspect scaler.
 void PocketPCLandscapeAspect(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
 
-#define RB(x) ((x & redblueMasks[maskUsed])<<8)
-#define G(x)  ((x & greenMasks[maskUsed])<<3)
+	const int redblueMasks[] = { 0x7C1F, 0xF81F };
+	const int greenMasks[] = { 0x03E0, 0x07E0 };
+	const int RBM = redblueMasks[gBitFormat == 565];
+	const int GM = greenMasks[gBitFormat == 565];
 
+	int i,j;
+	unsigned int p1, p2;
+	uint8 *inbuf, *outbuf, *instart, *outstart;
+
+#define RB(x) ((x & RBM)<<8)
+#define G(x)  ((x & GM)<<3)
+
 #define P20(x) (((x)>>2)-((x)>>4))
 #define P40(x) (((x)>>1)-((x)>>3))
 #define P60(x) (((x)>>1)+((x)>>3))
 #define P80(x) (((x)>>1)+((x)>>2)+((x)>>4))
 
-#define MAKEPIXEL(rb,g) ((((rb)>>8) & redblueMasks[maskUsed] | ((g)>>3) & greenMasks[maskUsed]))
+#define MAKEPIXEL(rb,g) ((((rb)>>8) & RBM | ((g)>>3) & GM))
 
-	int i,j;
-	unsigned int p1;
-	unsigned int p2;
-	uint16 * inbuf;
-	uint16 * outbuf;
-	inbuf = (uint16 *)srcPtr;
-	outbuf = (uint16 *)dstPtr;
+	inbuf = (uint8 *)srcPtr;
+	outbuf = (uint8 *)dstPtr;
+	height /= 5;
 
-	uint16 srcPitch16 = (uint16)(srcPitch / sizeof(uint16));
-	uint16 dstPitch16 = (uint16)(dstPitch / sizeof(uint16));
-
-	for (i = 0; i < height/5; i++) {
+	for (i = 0; i < height; i++) {
+		instart = inbuf;
+		outstart = outbuf;
 		for (j=0; j < width; j++) {
-			p1 = *((uint16*)inbuf+j); inbuf += srcPitch16;
-			*((uint16*)outbuf+j) = p1; outbuf += dstPitch16;
 
-			p2 = *((uint16*)inbuf+j); inbuf += srcPitch16;
-			*((uint16*)outbuf+j) = MAKEPIXEL(P20(RB(p1))+P80(RB(p2)),P20(G(p1))+P80(G(p2)));  outbuf += dstPitch16;
+			p1 = *(uint16*)inbuf; inbuf += srcPitch;
+			*(uint16*)outbuf = p1; outbuf += dstPitch;
 
+			p2 = *(uint16*)inbuf; inbuf += srcPitch;
+			*(uint16*)outbuf = MAKEPIXEL(P20(RB(p1))+P80(RB(p2)),P20(G(p1))+P80(G(p2)));  outbuf += dstPitch;
+
 			p1 = p2;
-			p2 = *((uint16*)inbuf+j); inbuf += srcPitch16;
-			*((uint16*)outbuf+j) = MAKEPIXEL(P40(RB(p1))+P60(RB(p2)),P40(G(p1))+P60(G(p2)));  outbuf += dstPitch16;
+			p2 = *(uint16*)inbuf; inbuf += srcPitch;
+			*(uint16*)outbuf = MAKEPIXEL(P40(RB(p1))+P60(RB(p2)),P40(G(p1))+P60(G(p2)));  outbuf += dstPitch;
 
 			p1 = p2;
-			p2 = *((uint16*)inbuf+j); inbuf += srcPitch16;
-			*((uint16*)outbuf+j) = MAKEPIXEL(P60(RB(p1))+P40(RB(p2)),P60(G(p1))+P40(G(p2)));  outbuf += dstPitch16;
+			p2 = *(uint16*)inbuf; inbuf += srcPitch;
+			*(uint16*)outbuf = MAKEPIXEL(P60(RB(p1))+P40(RB(p2)),P60(G(p1))+P40(G(p2)));  outbuf += dstPitch;
 
 			p1 = p2;
-			p2 = *((uint16*)inbuf+j);
-			*((uint16*)outbuf+j) = MAKEPIXEL(P80(RB(p1))+P20(RB(p2)),P80(G(p1))+P20(G(p2)));  outbuf += dstPitch16;
+			p2 = *(uint16*)inbuf;
+			*(uint16*)outbuf = MAKEPIXEL(P80(RB(p1))+P20(RB(p2)),P80(G(p1))+P20(G(p2)));  outbuf += dstPitch;
 
-			*((uint16*)outbuf+j) = p2;
+			*(uint16*)outbuf = p2;
 
-			inbuf = inbuf - srcPitch16*4;
-			outbuf = outbuf - dstPitch16*5;
+			inbuf = inbuf - srcPitch*4 + sizeof(uint16);
+			outbuf = outbuf - dstPitch*5 + sizeof(uint16);
 		}
-		inbuf = inbuf + srcPitch16*5;
-		outbuf = outbuf + dstPitch16*6;
+		inbuf = instart + srcPitch*5;
+		outbuf = outstart + dstPitch*6;
 	}
 }
 
@@ -150,10 +123,8 @@
 }
 #endif
 
-void PocketPCHalf(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-#ifdef ARM
-	PocketPCHalfARM(srcPtr, srcPitch, dstPtr, dstPitch, width, height, redbluegreenMasks[maskUsed],roundingconstants[maskUsed]);
-#else
+template<int bitFormat>
+void PocketPCHalfTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
 	uint8 *work;
 	int i;
 	uint16 srcPitch16 = (uint16)(srcPitch / sizeof(uint16));
@@ -168,18 +139,29 @@
 			uint16 color2 = *(((const uint16 *)srcPtr) + (i + 1));
 			uint16 color3 = *(((const uint16 *)srcPtr) + (i + srcPitch16));
 			uint16 color4 = *(((const uint16 *)srcPtr) + (i + srcPitch16 + 1));
-			*(((uint16 *)work) + 0) = CEinterpolate16_4(color1, color2, color3, color4);
+			*(((uint16 *)work) + 0) = interpolate16_1_1_1_1<bitFormat>(color1, color2, color3, color4);
 
 			work += sizeof(uint16);
 		}
 		srcPtr += 2 * srcPitch;
 		dstPtr += dstPitch;
 	}
+}
+
+void PocketPCHalf(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+#ifdef ARM
+	int maskUsed = (gBitFormat == 565);
+	PocketPCHalfARM(srcPtr, srcPitch, dstPtr, dstPitch, width, height, redbluegreenMasks[maskUsed],roundingconstants[maskUsed]);
+#else
+	if (gBitFormat == 565)
+		PocketPCHalfTemplate<565>(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
+	else
+		PocketPCHalfTemplate<565>(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
 #endif
 }
 
-
-void PocketPCHalfZoom(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+template<int bitFormat>
+void PocketPCHalfZoomTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
 	uint8 *work;
 	int i;
 	uint16 srcPitch16 = (uint16)(srcPitch / sizeof(uint16));
@@ -191,10 +173,10 @@
 		i = 0;
 		work = dstPtr;
 
-		for (int i=0; i<width; i+=2) {
+		for (int i = 0; i < width; i += 2) {
 			uint16 color1 = *(((const uint16 *)srcPtr) + i);
 			uint16 color2 = *(((const uint16 *)srcPtr) + (i + 1));
-			*(((uint16 *)work) + 0) = CEinterpolate16_2(color1, 1, color2, 1);
+			*(((uint16 *)work) + 0) = interpolate32_1_1<bitFormat>(color1, color2);
 
 			work += sizeof(uint16);
 		}
@@ -202,8 +184,10 @@
 		dstPtr += dstPitch;
 	}
 }
+MAKE_WRAPPER(PocketPCHalfZoom)
 
-void SmartphoneLandscape(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+template<int bitFormat>
+void SmartphoneLandscapeTemplate(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
 	uint8 *work;
 	int i;
 	int line = 0;
@@ -212,14 +196,14 @@
 		i = 0;
 		work = dstPtr;
 
-		for (int i=0; i<width; i+=3) {
+		for (int i = 0; i < width; i += 3) {
 			// Filter 2/3
 			uint16 color1 = *(((const uint16 *)srcPtr) + i);
 			uint16 color2 = *(((const uint16 *)srcPtr) + (i + 1));
 			uint16 color3 = *(((const uint16 *)srcPtr) + (i + 2));
 
-			*(((uint16 *)work) + 0) = CEinterpolate16_2(color1, 3, color2, 1);
-			*(((uint16 *)work) + 1) = CEinterpolate16_2(color2, 1, color3, 1);
+			*(((uint16 *)work) + 0) = interpolate32_3_1<bitFormat>(color1, color2);
+			*(((uint16 *)work) + 1) = interpolate32_3_1<bitFormat>(color3, color2);
 
 			work += 2 * sizeof(uint16);
 		}
@@ -233,3 +217,4 @@
 		}
 	}
 }
+MAKE_WRAPPER(SmartphoneLandscape)
Modified: scummvm/trunk/backends/platform/wince/CEScaler.h
===================================================================
--- scummvm/trunk/backends/platform/wince/CEScaler.h	2009-02-14 19:12:01 UTC (rev 36337)
+++ scummvm/trunk/backends/platform/wince/CEScaler.h	2009-02-14 19:42:18 UTC (rev 36338)
@@ -39,6 +39,4 @@
 DECLARE_SCALER(SmartphoneLandscape);
 //#endif
 
-void initCEScaler(void);
-
 #endif
Modified: scummvm/trunk/backends/platform/wince/wince-sdl.cpp
===================================================================
--- scummvm/trunk/backends/platform/wince/wince-sdl.cpp	2009-02-14 19:12:01 UTC (rev 36337)
+++ scummvm/trunk/backends/platform/wince/wince-sdl.cpp	2009-02-14 19:42:18 UTC (rev 36338)
@@ -1399,7 +1399,6 @@
 		InitScalers(555);
 	else
 		InitScalers(565);
-	initCEScaler();
 	_overlayFormat.bytesPerPixel = _hwscreen->format->BytesPerPixel;
 	_overlayFormat.rLoss = _hwscreen->format->Rloss;
 	_overlayFormat.gLoss = _hwscreen->format->Gloss;
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
    
    
More information about the Scummvm-git-logs
mailing list