[Scummvm-cvs-logs] SF.net SVN: scummvm: [27789] scummvm/trunk/sound

robinwatts at users.sourceforge.net robinwatts at users.sourceforge.net
Sat Jun 30 14:54:07 CEST 2007


Revision: 27789
          http://scummvm.svn.sourceforge.net/scummvm/?rev=27789&view=rev
Author:   robinwatts
Date:     2007-06-30 05:54:07 -0700 (Sat, 30 Jun 2007)

Log Message:
-----------
Update the ARM version of the rate conversion code, in line with Fingolfins
optimisatins of the C version last night. Saves about 5 cycles per sample.

Modified Paths:
--------------
    scummvm/trunk/sound/rate_arm.cpp
    scummvm/trunk/sound/rate_arm_asm.s

Modified: scummvm/trunk/sound/rate_arm.cpp
===================================================================
--- scummvm/trunk/sound/rate_arm.cpp	2007-06-30 12:43:53 UTC (rev 27788)
+++ scummvm/trunk/sound/rate_arm.cpp	2007-06-30 12:54:07 UTC (rev 27789)
@@ -50,13 +50,18 @@
 #include "sound/mixer.h"
 #include "common/util.h"
 
+//#define DEBUG_RATECONV
+
 namespace Audio {
 
 /**
  * The precision of the fractional computations used by the rate converter.
  * Normally you should never have to modify this value.
+ * This stuff is defined in common/frac.h, but we redefine it here as the
+ * ARM routine we call doesn't respect those definitions.
  */
 #define FRAC_BITS 16
+#define FRAC_ONE  (1<<FRAC_BITS)
 
 /**
  * The size of the intermediate input cache. Bigger values may increase
@@ -152,6 +157,10 @@
 extern "C" int SimpleRate_readFudge(Audio::AudioStream &input,
                                     int16 *a, int b)
 {
+#ifdef DEBUG_RATECONV
+  fprintf(stderr, "Reading ptr=%x n%d\n", a, b);
+  fflush(stderr);
+#endif
   return input.readBuffer(a, b);
 }
 
@@ -197,7 +206,7 @@
 	int inLen;
 
 	/** position of how far output is ahead of input */
-	/** Holds what would have been opos-ipos */
+	/** Holds what would have been opos-ipos<<16 + opos_frac */
 	long opos;
 
 	/** integer position increment in the output stream */
@@ -206,14 +215,9 @@
 	/** current sample(s) in the input stream (left/right channel) */
 	st_sample_t icur[2];
 	/** last sample(s) in the input stream (left/right channel) */
-	st_sample_t ilast[2];
+	/** Note, these are deliberately ints, not st_sample_t's */
+	int32 ilast[2];
 
-	/** fractional position in the output stream */
-	long opos_frac;
-
-	/** fractional position increment in the output stream */
-	long opos_inc_frac;
-
 	st_sample_t inBuf[INTERMEDIATE_BUFFER_SIZE];
 } LinearRateDetails;
 
@@ -270,16 +274,14 @@
 		error("rate effect can only handle rates < 65536");
 	}
 
-	lr.opos_frac = 0;
-	lr.opos = 1;
+	lr.opos = FRAC_ONE;
 
 	/* increment */
 	incr = (inrate << FRAC_BITS) / outrate;
 
-	lr.opos_inc_frac = incr & ((1UL << FRAC_BITS) - 1);
-	lr.opos_inc = incr >> FRAC_BITS;
+	lr.opos_inc = incr;
 
-	lr.ilast[0] = lr.ilast[1] = 0;
+	lr.ilast[0] = lr.ilast[1] = 32768;
 	lr.icur[0] = lr.icur[1] = 0;
 
 	lr.inLen = 0;

Modified: scummvm/trunk/sound/rate_arm_asm.s
===================================================================
--- scummvm/trunk/sound/rate_arm_asm.s	2007-06-30 12:43:53 UTC (rev 27788)
+++ scummvm/trunk/sound/rate_arm_asm.s	2007-06-30 12:54:07 UTC (rev 27789)
@@ -396,6 +396,7 @@
         LDMIA   r2,{r0,r1,r8}           @ r0 = inPtr
                                         @ r1 = inLen
                                         @ r8 = opos
+        MOV     r10,#0
         CMP     r11,#0                  @ if (osamp <= 0)
         BLE     LinearRate_M_end        @   bale
         ORR     r12,r12,r12,LSL #8      @ r12= vol_l as 16 bits
@@ -408,29 +409,23 @@
         SUBS    r1, r1, #1              @ r1 = inLen -= 1
         BLT     LinearRate_M_read
 LinearRate_M_read_return:
-        LDR     r10,[r2, #16]           @ r10= icur[0,1]
+        LDRH    r4, [r2, #16]           @ r4 = icur[0]
         LDRSH   r5, [r0],#2             @ r5 = tmp1 = *inPtr++
-        SUBS    r8, r8, #1              @ r8 = opos--
-        STR     r10,[r2,#20]            @      ilast[0,1] = icur[0,1]
+        SUBS    r8, r8, #65536          @ r8 = opos--
+        STRH    r4, [r2,#22]            @      ilast[0] = icur[0]
         STRH    r5, [r2,#16]            @      icur[0] = tmp1
         BGE     LinearRate_M_loop
 
         @ part2 - form output samples
 LinearRate_M_part2:
         @ We are guaranteed that opos < 0 here
-        LDRSH   r6, [r2,#20]            @ r6 = ilast[0]
+        LDR     r6, [r2,#20]            @ r6 = ilast[0]<<16 + 32768
         LDRSH   r5, [r2,#16]            @ r5 = icur[0]
-        LDRH    r4, [r2,#24]            @ r4 = opos_frac
-        LDR     r10,[r2,#28]            @ r10= opos_frac_inc
-        MOV     r6, r6, LSL #16         @ r6 = ilast[0]<<16
+	MOV	r4, r8, LSL #16
+	MOV	r4, r4, LSR #16
         SUB     r5, r5, r6, ASR #16     @ r5 = icur[0] - ilast[0]
-        ADD     r6, r6, #1<<15          @ r6 = ilast[0]+1<<(FRAC_BITS-1)
         MLA     r6, r4, r5, r6  @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0]
 
-        ADD     r4, r4, r10             @ r4 = tmp = opos_frac+opos_inc_frac
-        STRH    r4,[r2,#24]             @ opos_frac &= 65535
-        ADD     r8, r8, r4, LSR #16     @ opos += (tmp>>FRAC_BITS)
-
         LDRSH   r4, [r3]                @ r4 = obuf[0]
         LDRSH   r5, [r3,#2]             @ r5 = obuf[1]
         MOV     r6, r6, ASR #16         @ r6 = tmp0 = tmp1 >>= 16
@@ -438,10 +433,9 @@
         MUL     r6, r14,r6              @ r6 = tmp1*vol_r
 
         ADDS    r7, r7, r4, LSL #16     @ r7 = obuf[0]<<16 + tmp0*vol_l
-        MOV     r4, #0
-        RSCVS   r7, r4, #1<<31          @ Clamp r7
+        RSCVS   r7, r10, #1<<31         @ Clamp r7
         ADDS    r6, r6, r5, LSL #16     @ r6 = obuf[1]<<16 + tmp1*vol_r
-        RSCVS   r6, r4, #1<<31          @ Clamp r6
+        RSCVS   r6, r10, #1<<31         @ Clamp r6
 
         MOV     r7, r7, LSR #16         @ Shift back to halfword
         MOV     r6, r6, LSR #16         @ Shift back to halfword
@@ -449,7 +443,7 @@
         LDR     r5, [r2,#12]            @ r5 = opos_inc
         STRH    r7, [r3],#2             @ Store output value
         STRH    r6, [r3],#2             @ Store output value
-        SUBS    r11, r11,#1             @ opos--
+        SUBS    r11, r11,#1             @ osamp--
         BLE     LinearRate_M_end        @ end if needed
 
         ADDS    r8, r8, r5              @ r8 = opos += opos_inc
@@ -460,7 +454,7 @@
         STMIA   r2,{r0,r1,r8}
         LDMFD   r13!,{r4-r11,PC}
 LinearRate_M_read:
-        ADD     r0, r2, #32             @ r0 = inPtr = inBuf
+        ADD     r0, r2, #28             @ r0 = inPtr = inBuf
         STMFD   r13!,{r0,r2-r3,r12,r14}
 
         MOV     r1, r0                  @ r1 = inBuf
@@ -508,43 +502,37 @@
         LDR     r10,[r2, #16]           @ r10= icur[0,1]
         LDRSH   r5, [r0],#2             @ r5 = tmp0 = *inPtr++
         LDRSH   r6, [r0],#2             @ r5 = tmp1 = *inPtr++
-        SUBS    r8, r8, #1              @ r8 = opos--
-        STR     r10,[r2,#20]            @      ilast[0,1] = icur[0,1]
+        SUBS    r8, r8, #65536          @ r8 = opos--
+        STRH    r10,[r2,#22]            @      ilast[0] = icur[0]
+	MOV	r10,r10,LSR #16
+        STRH    r10,[r2,#26]            @      ilast[1] = icur[1]
         STRH    r5, [r2,#16]            @      icur[0] = tmp0
-        STRH    r6, [r2,#16]            @      icur[1] = tmp1
+        STRH    r6, [r2,#18]            @      icur[1] = tmp1
         BGE     LinearRate_S_loop
 
         @ part2 - form output samples
 LinearRate_S_part2:
         @ We are guaranteed that opos < 0 here
-        LDRSH   r6, [r2,#20]            @ r6 = ilast[0]
+        LDR     r6, [r2,#20]            @ r6 = ilast[0]<<16 + 32768
         LDRSH   r5, [r2,#16]            @ r5 = icur[0]
-        LDRH    r4, [r2,#24]            @ r4 = opos_frac
-        MOV     r6, r6, LSL #16         @ r6 = ilast[0]<<16
+        MOV	r4, r8, LSL #16
+        MOV	r4, r4, LSR #16
         SUB     r5, r5, r6, ASR #16     @ r5 = icur[0] - ilast[0]
-        ADD     r6, r6, #1<<15          @ r6 = ilast[0]+1<<(FRAC_BITS-1)
         MLA     r6, r4, r5, r6  @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0]
 
-        LDRSH   r7, [r2,#22]            @ r6 = ilast[1]
+        LDR     r7, [r2,#24]            @ r7 = ilast[1]<<16 + 32768
         LDRSH   r5, [r2,#18]            @ r5 = icur[1]
-        LDR     r10,[r2,#28]            @ r10= opos_frac_inc
-        MOV     r7, r7, LSL #16         @ r7 = ilast[1]<<16
+        LDRSH   r10,[r3]                @ r10= obuf[0]
+        MOV     r6, r6, ASR #16         @ r6 = tmp1 >>= 16
         SUB     r5, r5, r7, ASR #16     @ r5 = icur[1] - ilast[1]
-        ADD     r7, r7, #1<<15          @ r6 = ilast[1]+1<<(FRAC_BITS-1)
-        MLA     r7, r4, r5, r7  @ r6 = (icur[1]-ilast[1])*opos_frac+ilast[1]
+        MLA     r7, r4, r5, r7  @ r7 = (icur[1]-ilast[1])*opos_frac+ilast[1]
 
-        ADD     r4, r4, r10             @ r4 = tmp = opos_frac+opos_inc_frac
-        STRH    r4,[r2,#24]             @ opos_frac &= 65535
-        ADD     r8, r8, r4, LSR #16     @ opos += (tmp>>FRAC_BITS)
-
-        LDRSH   r4, [r3]                @ r4 = obuf[0]
         LDRSH   r5, [r3,#2]             @ r5 = obuf[1]
         MOV     r7, r7, ASR #16         @ r7 = tmp0 >>= 16
-        MOV     r6, r6, ASR #16         @ r6 = tmp1 >>= 16
         MUL     r7, r12,r7              @ r7 = tmp0*vol_l
         MUL     r6, r14,r6              @ r6 = tmp1*vol_r
 
-        ADDS    r7, r7, r4, LSL #16     @ r7 = obuf[0]<<16 + tmp0*vol_l
+        ADDS    r7, r7, r10, LSL #16    @ r7 = obuf[0]<<16 + tmp0*vol_l
         MOV     r4, #0
         RSCVS   r7, r4, #1<<31          @ Clamp r7
         ADDS    r6, r6, r5, LSL #16     @ r6 = obuf[1]<<16 + tmp1*vol_r
@@ -556,7 +544,7 @@
         LDR     r5, [r2,#12]            @ r5 = opos_inc
         STRH    r7, [r3],#2             @ Store output value
         STRH    r6, [r3],#2             @ Store output value
-        SUBS    r11, r11,#1             @ opos--
+        SUBS    r11, r11,#1             @ osamp--
         BLE     LinearRate_S_end        @ and loop
 
         ADDS    r8, r8, r5              @ r8 = opos += opos_inc
@@ -567,7 +555,7 @@
         STMIA   r2,{r0,r1,r8}
         LDMFD   r13!,{r4-r11,PC}
 LinearRate_S_read:
-        ADD     r0, r2, #32             @ r0 = inPtr = inBuf
+        ADD     r0, r2, #28             @ r0 = inPtr = inBuf
         STMFD   r13!,{r0,r2-r3,r12,r14}
 
         MOV     r1, r0                  @ r1 = inBuf
@@ -615,43 +603,37 @@
         LDR     r10,[r2, #16]           @ r10= icur[0,1]
         LDRSH   r5, [r0],#2             @ r5 = tmp0 = *inPtr++
         LDRSH   r6, [r0],#2             @ r5 = tmp1 = *inPtr++
-        SUBS    r8, r8, #1              @ r8 = opos--
-        STR     r10,[r2,#20]            @ ilast[0,1] = icur[0,1]
+        SUBS    r8, r8, #65536          @ r8 = opos--
+        STRH    r10,[r2,#22]            @ ilast[0] = icur[0]
+	MOV	r10,r10,LSR #16
+        STRH    r10,[r2,#22]            @ ilast[1] = icur[1]
         STRH    r5, [r2,#16]            @ icur[0] = tmp0
-        STRH    r6, [r2,#16]            @ icur[1] = tmp1
+        STRH    r6, [r2,#18]            @ icur[1] = tmp1
         BGE     LinearRate_R_loop
 
         @ part2 - form output samples
 LinearRate_R_part2:
         @ We are guaranteed that opos < 0 here
-        LDRSH   r6, [r2,#20]            @ r6 = ilast[0]
+        LDR     r6, [r2,#20]            @ r6 = ilast[0]
         LDRSH   r5, [r2,#16]            @ r5 = icur[0]
-        LDRH    r4, [r2,#24]            @ r4 = opos_frac
-        MOV     r6, r6, LSL #16         @ r6 = ilast[0]<<16
+        MOV	r4, r8, LSL #16
+        MOV	r4, r4, LSR #16
         SUB     r5, r5, r6, ASR #16     @ r5 = icur[0] - ilast[0]
-        ADD     r6, r6, #1<<15          @ r6 = ilast[0]+1<<(FRAC_BITS-1)
         MLA     r6, r4, r5, r6  @ r6 = (icur[0]-ilast[0])*opos_frac+ilast[0]
 
-        LDRSH   r7, [r2,#22]            @ r6 = ilast[1]
+        LDR     r7, [r2,#24]            @ r7 = ilast[1]
         LDRSH   r5, [r2,#18]            @ r5 = icur[1]
-        LDR     r10,[r2,#28]            @ r10= opos_frac_inc
-        MOV     r7, r7, LSL #16         @ r7 = ilast[1]<<16
+        LDR     r10,[r3]                @ r10= obuf[0]
+        MOV     r6, r6, ASR #16         @ r6 = tmp1 >>= 16
         SUB     r5, r5, r7, ASR #16     @ r5 = icur[1] - ilast[1]
-        ADD     r7, r7, #1<<15          @ r6 = ilast[1]+1<<(FRAC_BITS-1)
-        MLA     r7, r4, r5, r7  @ r6 = (icur[1]-ilast[1])*opos_frac+ilast[1]
+        MLA     r7, r4, r5, r7  @ r7 = (icur[1]-ilast[1])*opos_frac+ilast[1]
 
-        ADD     r4, r4, r10             @ r4 = tmp = opos_frac+opos_inc_frac
-        STRH    r4,[r2,#24]             @ opos_frac &= 65535
-        ADD     r8, r8, r4, LSR #16     @ opos += (tmp>>FRAC_BITS)
-
-        LDRSH   r4, [r3]                @ r4 = obuf[0]
         LDRSH   r5, [r3,#2]             @ r5 = obuf[1]
         MOV     r7, r7, ASR #16         @ r7 = tmp0 >>= 16
-        MOV     r6, r6, ASR #16         @ r6 = tmp1 >>= 16
         MUL     r7, r12,r7              @ r7 = tmp0*vol_l
         MUL     r6, r14,r6              @ r6 = tmp1*vol_r
 
-        ADDS    r7, r7, r4, LSL #16     @ r7 = obuf[0]<<16 + tmp0*vol_l
+        ADDS    r7, r7, r10, LSL #16    @ r7 = obuf[0]<<16 + tmp0*vol_l
         MOV     r4, #0
         RSCVS   r7, r4, #1<<31          @ Clamp r7
         ADDS    r6, r6, r5, LSL #16     @ r6 = obuf[1]<<16 + tmp1*vol_r
@@ -663,7 +645,7 @@
         LDR     r5, [r2,#12]            @ r5 = opos_inc
         STRH    r6, [r3],#2             @ Store output value
         STRH    r7, [r3],#2             @ Store output value
-        SUBS    r11, r11,#1             @ opos--
+        SUBS    r11, r11,#1             @ osamp--
         BLE     LinearRate_R_end        @ and loop
 
         ADDS    r8, r8, r5              @ r8 = opos += opos_inc
@@ -674,7 +656,7 @@
         STMIA   r2,{r0,r1,r8}
         LDMFD   r13!,{r4-r11,PC}
 LinearRate_R_read:
-        ADD     r0, r2, #32             @ r0 = inPtr = inBuf
+        ADD     r0, r2, #28             @ r0 = inPtr = inBuf
         STMFD   r13!,{r0,r2-r3,r12,r14}
 
         MOV     r1, r0                  @ r1 = inBuf


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.




More information about the Scummvm-git-logs mailing list