[Scummvm-cvs-logs] SF.net SVN: scummvm: [21961] scummvm/trunk

Mon Apr 17 01:39:01 CEST 2006

Revision: 21961
Author:   fingolfin
Date:     2006-04-17 01:38:26 -0700 (Mon, 17 Apr 2006)
ViewCVS:  http://svn.sourceforge.net/scummvm/?rev=21961&view=rev

Log Message:
-----------
Removed my Altivec version of the hqx blitters, since it (a) didn't work with newer GCC versions, (b) speed was improved on some systems, but actually slower on others, and (c) even on my old 400 Mhz G4 hqx is fast enough w/o it

Modified Paths:
--------------
    scummvm/trunk/Makefile
    scummvm/trunk/configure
    scummvm/trunk/graphics/scaler/hq2x.cpp
    scummvm/trunk/graphics/scaler/hq2x.h
    scummvm/trunk/graphics/scaler/hq3x.cpp
    scummvm/trunk/graphics/scaler/hq3x.h
Modified: scummvm/trunk/Makefile
===================================================================

--- scummvm/trunk/Makefile	2006-04-17 05:01:29 UTC (rev 21960)
+++ scummvm/trunk/Makefile	2006-04-17 08:38:26 UTC (rev 21961)
@@ -91,8 +91,7 @@
 OSXOPT=/sw
 # Special target to create a static linked binary for Mac OS X.
 # We use -force_cpusubtype_ALL to ensure the binary runs on every
-# PowerPC machine, even though we use Altivec code (we dynamically
-# check whether Altivec is available before trying to run it).
+# PowerPC machine.
 scummvm-static: $(OBJS)
 	$(CXX) $(LDFLAGS) -force_cpusubtype_ALL -o scummvm-static $(OBJS) \
 		`sdl-config --static-libs` \

Modified: scummvm/trunk/configure
===================================================================
--- scummvm/trunk/configure	2006-04-17 05:01:29 UTC (rev 21960)
+++ scummvm/trunk/configure	2006-04-17 08:38:26 UTC (rev 21961)
@@ -62,7 +62,6 @@
 _need_memalign=no
 _build_plugins=no
 _nasm=auto
-_altivec=auto
 _build_hq_scalers=yes
 _build_scalers=yes
 # more defaults
@@ -400,7 +399,6 @@
       --enable-plugins)         _build_plugins=yes ;;
       --enable-mt32emu)         _mt32emu=yes    ;;
       --disable-mt32emu)        _mt32emu=no     ;;
-      --disable-altivec)        _altivec=no     ;;
       --with-fluidsynth-prefix=*)
         arg=`echo $ac_option | cut -d '=' -f 2`
         FLUIDSYNTH_CFLAGS="-I$arg/include"
@@ -1233,30 +1231,6 @@
 test -z "$_mandir" && _mandir="$_prefix/man"
 
 #
-# Check for Altivec, if on MacOS X
-#
-case $_host_os in
-	darwin*)
-		echocheck "Altivec"
-		if test "$_altivec" = auto ; then
-			_altivec=no
-			cat > $TMPC << EOF
-int main(void) { return 0; }
-EOF
-			cc_check $LDFLAGS $CXXFLAGS -maltivec && _altivec=yes
-		fi
-		if test "$_altivec" = yes ; then
-				DEFINES="$DEFINES -DHAS_ALTIVEC"
-				CXXFLAGS="$CXXFLAGS -faltivec"
-				CFLAGS="$CFLAGS -faltivec"
-		fi  
-		echo "$_altivec"
-		rm -f $TMPC $TMPO$EXEEXT
-	;;
-esac
-
-
-#
 # Check which engines ("frontends") are to be built
 #
 echo

Modified: scummvm/trunk/graphics/scaler/hq2x.cpp
===================================================================
--- scummvm/trunk/graphics/scaler/hq2x.cpp	2006-04-17 05:01:29 UTC (rev 21960)
+++ scummvm/trunk/graphics/scaler/hq2x.cpp	2006-04-17 08:38:26 UTC (rev 21961)
@@ -42,35 +42,6 @@
 
 #else
 
-#ifdef HAS_ALTIVEC
-
-#ifdef __amigaos4__
-#include <proto/exec.h>
-#include <altivec.h>
-static bool isAltiVecAvailable() {
-	uint32 vecUnit;
-	IExec->GetCPUInfo(GCIT_VectorUnit, &vecUnit, TAG_DONE);
-	if (vecUnit == VECTORTYPE_NONE)
-		return false;
-	else
-		return true;
-}
-#else
-
-#include <sys/sysctl.h>
-
-static bool isAltiVecAvailable()  {
-	int selectors[2] = { CTL_HW, HW_VECTORUNIT };
-	int hasVectorUnit = 0;
-	size_t length = sizeof(hasVectorUnit);
-	int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
-	if ( 0 == error )
-		return hasVectorUnit != 0;
-	return false;
-}
-#endif
-#endif
-
 #define PIXEL00_0	*(q) = w5;
 #define PIXEL00_10	*(q) = interpolate16_2<bitFormat,3,1>(w5, w1);
 #define PIXEL00_11	*(q) = interpolate16_2<bitFormat,3,1>(w5, w4);
@@ -139,33 +110,7 @@
 #undef bitFormat
 
 
-#ifdef HAS_ALTIVEC
-	#define USE_ALTIVEC	1
-
-	#define bitFormat 565
-	void HQ2x_565_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-		#include "graphics/scaler/hq2x.h"
-	}
-	#undef bitFormat
-
-	#define bitFormat 555
-	void HQ2x_555_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-		#include "graphics/scaler/hq2x.h"
-	}
-	#undef bitFormat
-#endif
-
 void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-#ifdef HAS_ALTIVEC
-	if (isAltiVecAvailable()) {
-		if (gBitFormat == 565)
-			HQ2x_565_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
-		else
-			HQ2x_555_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
-		return;
-	}
-#endif
-
 	if (gBitFormat == 565)
 		HQ2x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
 	else

Modified: scummvm/trunk/graphics/scaler/hq2x.h
===================================================================
--- scummvm/trunk/graphics/scaler/hq2x.h	2006-04-17 05:01:29 UTC (rev 21960)
+++ scummvm/trunk/graphics/scaler/hq2x.h	2006-04-17 08:38:26 UTC (rev 21961)
@@ -46,24 +46,6 @@
 	//	 | w7 | w8 | w9 |
 	//	 +----+----+----+
 
-#ifdef USE_ALTIVEC
-	// The YUV threshold.
-	static const vector unsigned char vThreshold = (vector unsigned char)((vector unsigned int)0x00300706);
-
-	// Bit pattern mask.
-	static const vector signed int vPatternMask1 = (vector signed int)(0x01, 0x02, 0x04, 0x08);
-	static const vector signed int vPatternMask2 = (vector signed int)(0x10, 0x20, 0x40, 0x80);
-
-	// Permutation masks for the incremental vector loading (see below for more information).
-	static const vector unsigned char vPermuteToV1234 = (vector unsigned char)( 4, 5, 6, 7,  8,9,10,11,  20,21,22,23,  16,17,18,19);
-	static const vector unsigned char vPermuteToV6789 = (vector unsigned char)(24,25,26,27,  8,9,10,11,  12,13,14,15,  28,29,30,31);
-
-	// The YUV vectors.
-	vector signed char vecYUV5555;
-	vector signed char vecYUV1234;
-	vector signed char vecYUV6789;
-#endif
-
 	while (height--) {
 		w1 = *(p - 1 - nextlineSrc);
 		w4 = *(p - 1);
@@ -73,15 +55,6 @@
 		w5 = *(p);
 		w8 = *(p + nextlineSrc);
 
-#ifdef USE_ALTIVEC
-		// Load inital values of vecYUV1234 / vecYUV6789
-		const int arr1234[4] = {0, YUV(1), YUV(2), 0};
-		const int arr6789[4] = {YUV(5), 0, YUV(7), YUV(8)};
-
-		vecYUV1234 = *(const vector signed char *)arr1234;
-		vecYUV6789 = *(const vector signed char *)arr6789;
-#endif
-
 		int tmpWidth = width;
 		while (tmpWidth--) {
 			p++;
@@ -91,74 +64,6 @@
 			w9 = *(p + nextlineSrc);
 
 			int pattern = 0;
-
-#ifdef USE_ALTIVEC
-			/*
-			Consider this peephole into the image buffer:
-			+----+----+----+----+
-			|    |    |    |    |
-			| w00| w01| w02| w03|
-			+----+----+----+----+
-			|    |    |    |    |
-			| w10| w11| w12| w13|
-			+----+----+----+----+
-			|    |    |    |    |
-			| w20| w21| w22| w23|
-			+----+----+----+----+
-
-			In the previous loop iteration, w11 was the center point, and our
-			vectors contain the following data from the previous iteration:
-			vecYUV5555 = { w11, w11, w11, w11 }
-			vecYUV1234 = { w00, w01, w02, w10 }
-			vecYUV6789 = { w12, w20, w21, w22 }
-
-			Now we have the new center point w12, and we would like to have
-			the following values in our vectors:
-			vecYUV5555 = { w12, w12, w12, w12 }
-			vecYUV1234 = { w01, w02, w03, w11 }
-			vecYUV6789 = { w13, w21, w22, w23 }
-
-			To this end we load a single new vector:
-			vTmp = { w11, w03, w13, w23 }
-
-			We then can compute all the new vector values using permutations only:
-			vecYUV5555 = { vecYUV6789[0], vecYUV6789[0], vecYUV6789[0], vecYUV6789[0] }
-			vecYUV1234 = { vecYUV1234[1], vecYUV1234[2],  vTmp[1],  vTmp[0] }
-			vecYUV6789 = {  vTmp[2], vecYUV6789[2], vecYUV6789[3],  vTmp[3] }
-
-			Beautiful, isn't it? :-)
-			*/
-
-			// Load the new values into a temporary vector (see above for an explanation)
-			const int tmpArr[4] = {YUV(4), YUV(3), YUV(6), YUV(9)};
-			vector signed char vTmp = *(const vector signed char *)tmpArr;
-
-			// Next update the data vectors
-			vecYUV5555 = (vector signed char)vec_splat((vector unsigned int)vecYUV6789, 0);
-			vecYUV1234 = vec_perm(vecYUV1234, vTmp, vPermuteToV1234);
-			vecYUV6789 = vec_perm(vecYUV6789, vTmp, vPermuteToV6789);
-
-			// Compute the absolute difference between the center point's YUV and the outer points
-			const vector signed char vDiff1 = vec_abs(vec_sub(vecYUV5555, vecYUV1234));
-			const vector signed char vDiff2 = vec_abs(vec_sub(vecYUV5555, vecYUV6789));
-
-			// Compare the difference to the threshold (byte-wise)
-			const vector bool char vCmp1 = vec_cmpgt((vector unsigned char)vDiff1, vThreshold);
-			const vector bool char vCmp2 = vec_cmpgt((vector unsigned char)vDiff2, vThreshold);
-
-			// Convert all non-zero (long) vector elements to 0xF...F, keep 0 at 0.
-			// Then and in the patter masks. The idea is that for 0 components, we get 0,
-			// while for the other components we get exactly the mask value.
-			const vector signed int vPattern1 = vec_and(vec_cmpgt((vector unsigned int)vCmp1, (vector unsigned int)0), vPatternMask1);
-			const vector signed int vPattern2 = vec_and(vec_cmpgt((vector unsigned int)vCmp2, (vector unsigned int)0), vPatternMask2);
-
-			// Now sum up the components of all vectors. Since our pattern mask values
-			// are all "orthogonal", this is effectively the same as ORing them all
-			// together. In the end, the rightmost word of vSum contains the 'pattern'
-			vector signed int vSum = vec_sums(vPattern1, (vector signed int)0);
-			vSum = vec_sums(vPattern2, vSum);
-			pattern = ((int *)&vSum)[3];
-#else
 			const int yuv5 = YUV(5);
 			if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001;
 			if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002;
@@ -168,7 +73,6 @@
 			if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020;
 			if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040;
 			if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080;
-#endif
 
 			switch (pattern) {
 			case 0:

Modified: scummvm/trunk/graphics/scaler/hq3x.cpp
===================================================================
--- scummvm/trunk/graphics/scaler/hq3x.cpp	2006-04-17 05:01:29 UTC (rev 21960)
+++ scummvm/trunk/graphics/scaler/hq3x.cpp	2006-04-17 08:38:26 UTC (rev 21961)
@@ -43,34 +43,6 @@
 
 #else
 
-#ifdef HAS_ALTIVEC
-
-#ifdef __amigaos4__
-#include <proto/exec.h>
-static bool isAltiVecAvailable() {
-	uint32 vecUnit;
-	IExec->GetCPUInfo(GCIT_VectorUnit, &vecUnit, TAG_DONE);
-	if (vecUnit == VECTORTYPE_NONE)
-		return false;
-	else
-		return true;
-}
-#else
-
-#include <sys/sysctl.h>
-
-static bool isAltiVecAvailable()  {
-	int selectors[2] = { CTL_HW, HW_VECTORUNIT };
-	int hasVectorUnit = 0;
-	size_t length = sizeof(hasVectorUnit);
-	int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
-	if ( 0 == error )
-		return hasVectorUnit != 0;
-	return false;
-}
-#endif
-#endif
-
 #define PIXEL00_1M  *(q) = interpolate16_2<bitFormat,3,1>(w5, w1);
 #define PIXEL00_1U  *(q) = interpolate16_2<bitFormat,3,1>(w5, w2);
 #define PIXEL00_1L  *(q) = interpolate16_2<bitFormat,3,1>(w5, w4);
@@ -141,33 +113,7 @@
 #undef bitFormat
 
 
-#ifdef HAS_ALTIVEC
-	#define USE_ALTIVEC	1
-
-	#define bitFormat 565
-	void HQ3x_565_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-		#include "graphics/scaler/hq3x.h"
-	}
-	#undef bitFormat
-
-	#define bitFormat 555
-	void HQ3x_555_Altivec(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-		#include "graphics/scaler/hq3x.h"
-	}
-	#undef bitFormat
-#endif
-
 void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-#ifdef HAS_ALTIVEC
-	if (isAltiVecAvailable()) {
-		if (gBitFormat == 565)
-			HQ3x_565_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
-		else
-			HQ3x_555_Altivec(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
-		return;
-	}
-#endif
-
 	if (gBitFormat == 565)
 		HQ3x_565(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
 	else

Modified: scummvm/trunk/graphics/scaler/hq3x.h
===================================================================
--- scummvm/trunk/graphics/scaler/hq3x.h	2006-04-17 05:01:29 UTC (rev 21960)
+++ scummvm/trunk/graphics/scaler/hq3x.h	2006-04-17 08:38:26 UTC (rev 21961)
@@ -47,24 +47,6 @@
 	//	 | w7 | w8 | w9 |
 	//	 +----+----+----+
 
-#ifdef USE_ALTIVEC
-	// The YUV threshold.
-	static const vector unsigned char vThreshold = (vector unsigned char)((vector unsigned int)0x00300706);
-
-	// Bit pattern mask.
-	static const vector signed int vPatternMask1 = (vector signed int)(0x01,0x02,0x04,0x08);
-	static const vector signed int vPatternMask2 = (vector signed int)(0x10,0x20,0x40,0x80);
-
-	// Permutation masks for the incremental vector loading (see below for more information).
-	static const vector unsigned char vPermuteToV1234 = (vector unsigned char)( 4, 5, 6, 7,  8,9,10,11,  20,21,22,23,  16,17,18,19);
-	static const vector unsigned char vPermuteToV6789 = (vector unsigned char)(24,25,26,27,  8,9,10,11,  12,13,14,15,  28,29,30,31);
-
-	// The YUV vectors.
-	vector signed char vecYUV5555;
-	vector signed char vecYUV1234;
-	vector signed char vecYUV6789;
-#endif
-
 	while (height--) {
 		w1 = *(p - 1 - nextlineSrc);
 		w4 = *(p - 1);
@@ -74,15 +56,6 @@
 		w5 = *(p);
 		w8 = *(p + nextlineSrc);
 
-#ifdef USE_ALTIVEC
-		// Load inital values of vecYUV1234 / vecYUV6789
-		const int arr1234[4] = {0, YUV(1), YUV(2), 0};
-		const int arr6789[4] = {YUV(5), 0, YUV(7), YUV(8)};
-
-		vecYUV1234 = *(const vector signed char *)arr1234;
-		vecYUV6789 = *(const vector signed char *)arr6789;
-#endif
-
 		int tmpWidth = width;
 		while (tmpWidth--) {
 			p++;
@@ -92,74 +65,6 @@
 			w9 = *(p + nextlineSrc);
 
 			int pattern = 0;
-
-#ifdef USE_ALTIVEC
-			/*
-			Consider this peephole into the image buffer:
-			+----+----+----+----+
-			|    |    |    |    |
-			| w00| w01| w02| w03|
-			+----+----+----+----+
-			|    |    |    |    |
-			| w10| w11| w12| w13|
-			+----+----+----+----+
-			|    |    |    |    |
-			| w20| w21| w22| w23|
-			+----+----+----+----+
-
-			In the previous loop iteration, w11 was the center point, and our
-			vectors contain the following data from the previous iteration:
-			vecYUV5555 = { w11, w11, w11, w11 }
-			vecYUV1234 = { w00, w01, w02, w10 }
-			vecYUV6789 = { w12, w20, w21, w22 }
-
-			Now we have the new center point w12, and we would like to have
-			the following values in our vectors:
-			vecYUV5555 = { w12, w12, w12, w12 }
-			vecYUV1234 = { w01, w02, w03, w11 }
-			vecYUV6789 = { w13, w21, w22, w23 }
-
-			To this end we load a single new vector:
-			vTmp = { w11, w03, w13, w23 }
-
-			We then can compute all the new vector values using permutations only:
-			vecYUV5555 = { vecYUV6789[0], vecYUV6789[0], vecYUV6789[0], vecYUV6789[0] }
-			vecYUV1234 = { vecYUV1234[1], vecYUV1234[2],  vTmp[1],  vTmp[0] }
-			vecYUV6789 = {  vTmp[2], vecYUV6789[2], vecYUV6789[3],  vTmp[3] }
-
-			Beautiful, isn't it? :-)
-			*/
-
-			// Load the new values into a temporary vector (see above for an explanation)
-			const int tmpArr[4] = {YUV(4), YUV(3), YUV(6), YUV(9)};
-			vector signed char vTmp = *(const vector signed char *)tmpArr;
-
-			// Next update the data vectors
-			vecYUV5555 = (vector signed char)vec_splat((vector unsigned int)vecYUV6789, 0);
-			vecYUV1234 = vec_perm(vecYUV1234, vTmp, vPermuteToV1234);
-			vecYUV6789 = vec_perm(vecYUV6789, vTmp, vPermuteToV6789);
-
-			// Compute the absolute difference between the center point's YUV and the outer points
-			const vector signed char vDiff1 = vec_abs(vec_sub(vecYUV5555, vecYUV1234));
-			const vector signed char vDiff2 = vec_abs(vec_sub(vecYUV5555, vecYUV6789));
-
-			// Compare the difference to the threshold (byte-wise)
-			const vector bool char vCmp1 = vec_cmpgt((vector unsigned char)vDiff1, vThreshold);
-			const vector bool char vCmp2 = vec_cmpgt((vector unsigned char)vDiff2, vThreshold);
-
-			// Convert all non-zero (long) vector elements to 0xF...F, keep 0 at 0.
-			// Then and in the patter masks. The idea is that for 0 components, we get 0,
-			// while for the other components we get exactly the mask value.
-			const vector signed int vPattern1 = vec_and(vec_cmpgt((vector unsigned int)vCmp1, (vector unsigned int)0), vPatternMask1);
-			const vector signed int vPattern2 = vec_and(vec_cmpgt((vector unsigned int)vCmp2, (vector unsigned int)0), vPatternMask2);
-
-			// Now sum up the components of all vectors. Since our pattern mask values
-			// are all "orthogonal", this is effectively the same as ORing them all
-			// together. In the end, the rightmost word of vSum contains the 'pattern'
-			vector signed int vSum = vec_sums(vPattern1, (vector signed int)0);
-			vSum = vec_sums(vPattern2, vSum);
-			pattern = ((int *)&vSum)[3];
-#else
 			const int yuv5 = YUV(5);
 			if (w5 != w1 && diffYUV(yuv5, YUV(1))) pattern |= 0x0001;
 			if (w5 != w2 && diffYUV(yuv5, YUV(2))) pattern |= 0x0002;
@@ -169,7 +74,6 @@
 			if (w5 != w7 && diffYUV(yuv5, YUV(7))) pattern |= 0x0020;
 			if (w5 != w8 && diffYUV(yuv5, YUV(8))) pattern |= 0x0040;
 			if (w5 != w9 && diffYUV(yuv5, YUV(9))) pattern |= 0x0080;
-#endif
 
 			switch (pattern) {
 			case 0:


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.