[Scummvm-cvs-logs] SF.net SVN: scummvm: [24804] scummvm/trunk/backends/platform/ds/arm9/source /blitters.cpp

Tramboi at users.sourceforge.net Tramboi at users.sourceforge.net
Sun Dec 3 17:09:35 CET 2006


Revision: 24804
          http://scummvm.svn.sourceforge.net/scummvm/?rev=24804&view=rev
Author:   Tramboi
Date:     2006-12-03 08:09:26 -0800 (Sun, 03 Dec 2006)

Log Message:
-----------
NDS : Micro-optimizations for the software blitter

Modified Paths:
--------------
    scummvm/trunk/backends/platform/ds/arm9/source/blitters.cpp

Modified: scummvm/trunk/backends/platform/ds/arm9/source/blitters.cpp
===================================================================
--- scummvm/trunk/backends/platform/ds/arm9/source/blitters.cpp	2006-12-03 15:42:41 UTC (rev 24803)
+++ scummvm/trunk/backends/platform/ds/arm9/source/blitters.cpp	2006-12-03 16:09:26 UTC (rev 24804)
@@ -143,28 +143,45 @@
     u32 bs2 = s2 & 0x1F;
     u32 bs3 = s3 & 0x1F;
     u32 bs4 = s4 & 0x1F;
-    
-    u32 gs0_4 = (s0 >> 3) & 0x7C;
+
+#if 0    
+    u32 gs0 = (s0 >> 5) & 0x1F;
     u32 gs1 = (s1 >> 5) & 0x1F;
     u32 gs2 = (s2 >> 5) & 0x1F;
     u32 gs3 = (s3 >> 5) & 0x1F;
-    u32 gs4_4 = (s4 >> 3) & 0x7C;
+    u32 gs4 = (s4 >> 5) & 0x1F;
     
-    u32 rs0_4 = (s0 >> 8) & 0x7C;
+    u32 rs0 = (s0 >> 10) & 0x1F;
     u32 rs1 = (s1 >> 10) & 0x1F;
     u32 rs2 = (s2 >> 10) & 0x1F;
     u32 rs3 = (s3 >> 10) & 0x1F;
-    u32 rs4_4 = (s4 >> 8) & 0x7C;
+    u32 rs4 = (s4 >> 10) & 0x1F;
+#else
+    // The compiler absolutely wants to use 0x1F as an immediate, which makes it unable to fold the shift during the and
+    u32 mask = 0x1F;
+    u32 gs0, gs1, gs2, gs3, gs4;
+    asm("and %0, %2, %1, lsr #5" : "=r"(gs0) : "r"(s0), "r"(mask) : );
+    asm("and %0, %2, %1, lsr #5" : "=r"(gs1) : "r"(s1), "r"(mask) : );
+    asm("and %0, %2, %1, lsr #5" : "=r"(gs2) : "r"(s2), "r"(mask) : );
+    asm("and %0, %2, %1, lsr #5" : "=r"(gs3) : "r"(s3), "r"(mask) : );
+    asm("and %0, %2, %1, lsr #5" : "=r"(gs4) : "r"(s4), "r"(mask) : );
+    u32 rs0, rs1, rs2, rs3, rs4;
+    asm("and %0, %2, %1, lsr #10" : "=r"(rs0) : "r"(s0), "r"(mask) : );
+    asm("and %0, %2, %1, lsr #10" : "=r"(rs1) : "r"(s1), "r"(mask) : );
+    asm("and %0, %2, %1, lsr #10" : "=r"(rs2) : "r"(s2), "r"(mask) : );
+    asm("and %0, %2, %1, lsr #10" : "=r"(rs3) : "r"(s3), "r"(mask) : );
+    asm("and %0, %2, %1, lsr #10" : "=r"(rs4) : "r"(s4), "r"(mask) : );
+#endif
     
-    u32 rd0 = rs0_4 +   rs1;
+    u32 rd0 = 4*rs0 +   rs1;
     u32 rd1 = 2*rs1 + rs1 + 2*rs2;
     u32 rd2 = 2*rs2 + 2*rs3 + rs3;
-    u32 rd3 =   rs3 + rs4_4;
+    u32 rd3 =   rs3 + 4*rs4;
     
-    u32 gd0 = gs0_4 +   gs1;
+    u32 gd0 = 4*gs0 +   gs1;
     u32 gd1 = 2*gs1 + gs1 + 2*gs2;
     u32 gd2 = 2*gs2 + 2*gs3 + gs3;
-    u32 gd3 =   gs3 + gs4_4;
+    u32 gd3 =   gs3 + 4*gs4;
     
     u32 bd0 = 4*bs0 +   bs1;
     u32 bd1 = 2*bs1 + bs1 + 2*bs2;


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list