[Scummvm-git-logs] scummvm branch-2-9 -> 22c8eb466e0a678a737fa79f40152f57eefecf63
mikrosk
noreply at scummvm.org
Fri Apr 25 21:31:09 UTC 2025
This automated email contains information about 1 new commit which have been
pushed to the 'scummvm' repo located at https://api.github.com/repos/scummvm/scummvm .
Summary:
22c8eb466e Revert "BACKENDS: ATARI: Remove move16 code"
Commit: 22c8eb466e0a678a737fa79f40152f57eefecf63
https://github.com/scummvm/scummvm/commit/22c8eb466e0a678a737fa79f40152f57eefecf63
Author: Miro Kropacek (miro.kropacek at gmail.com)
Date: 2025-04-25T23:30:54+02:00
Commit Message:
Revert "BACKENDS: ATARI: Remove move16 code"
This reverts commit f18147a08a812bc06d6d21117e72140604cc3397.
Embarrassingly, my benchmark code was broken. move16 very much does
help, both in VRAM<->TT RAM and TT RAM<->TT RAM copying.
Changed paths:
backends/platform/atari/build-release.sh
graphics/blit/blit-atari.cpp
diff --git a/backends/platform/atari/build-release.sh b/backends/platform/atari/build-release.sh
index 31b577147c2..b161a2756f3 100755
--- a/backends/platform/atari/build-release.sh
+++ b/backends/platform/atari/build-release.sh
@@ -10,7 +10,7 @@ PLATFORM=m68k-atari-mintelf
FASTCALL=false
export ASFLAGS="-m68020-60"
-export CXXFLAGS="-m68020-60 -DUSE_SUPERVIDEL -DUSE_SV_BLITTER"
+export CXXFLAGS="-m68020-60 -DUSE_MOVE16 -DUSE_SUPERVIDEL -DUSE_SV_BLITTER"
export LDFLAGS="-m68020-60"
export PKG_CONFIG_LIBDIR="$(${PLATFORM}-gcc -print-sysroot)/usr/lib/m68020-60/pkgconfig"
diff --git a/graphics/blit/blit-atari.cpp b/graphics/blit/blit-atari.cpp
index 02ba8a6fe88..cf8257c5aa7 100644
--- a/graphics/blit/blit-atari.cpp
+++ b/graphics/blit/blit-atari.cpp
@@ -66,6 +66,14 @@ static void syncSuperBlitter() {
}
#endif
+#ifdef USE_MOVE16
+static inline bool hasMove16() {
+ long val;
+ static bool hasMove16 = Getcookie(C__CPU, &val) == C_FOUND && val >= 40;
+ return hasMove16;
+}
+#endif
+
void lockSuperBlitter() {
#ifdef USE_SV_BLITTER
assert(!isSuperBlitterLocked);
@@ -182,12 +190,152 @@ void copyBlit(byte *dst, const byte *src,
} else
#endif
if (dstPitch == srcPitch && dstPitch == (w * bytesPerPixel)) {
- memcpy(dst, src, dstPitch * h);
+#ifdef USE_MOVE16
+ if (hasMove16() && ((uintptr)src & (ALIGN - 1)) == 0 && ((uintptr)dst & (ALIGN - 1)) == 0) {
+ __asm__ volatile(
+ " move.l %2,%%d0\n"
+ " lsr.l #4,%%d0\n"
+ " beq.b 3f\n"
+
+ " moveq #0x0f,%%d1\n"
+ " and.l %%d0,%%d1\n"
+ " neg.l %%d1\n"
+ " lsr.l #4,%%d0\n"
+ " jmp (2f,%%pc,%%d1.l*4)\n"
+ "1:\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ "2:\n"
+ " dbra %%d0,1b\n"
+ // handle also the unlikely case when 'dstPitch'
+ // is not divisible by 16 but 'src' and 'dst' are
+ "3:\n"
+ " moveq #0x0f,%%d0\n"
+ " and.l %2,%%d0\n"
+ " neg.l %%d0\n"
+ " jmp (4f,%%pc,%%d0.l*2)\n"
+ // only 15x move.b as 16 would be handled above
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ "4:\n"
+ : // outputs
+ : "a"(src), "a"(dst), "g"(dstPitch * h) // inputs
+ : "d0", "d1", "cc" AND_MEMORY
+ );
+ } else {
+#else
+ {
+#endif
+ memcpy(dst, src, dstPitch * h);
+ }
} else {
- for (uint i = 0; i < h; ++i) {
- memcpy(dst, src, w * bytesPerPixel);
- dst += dstPitch;
- src += srcPitch;
+#ifdef USE_MOVE16
+ if (hasMove16() && ((uintptr)src & (ALIGN - 1)) == 0 && ((uintptr)dst & (ALIGN - 1)) == 0
+ && (srcPitch & (ALIGN - 1)) == 0 && (dstPitch & (ALIGN - 1)) == 0) {
+ __asm__ volatile(
+ " move.l %2,%%d0\n"
+
+ " moveq #0x0f,%%d1\n"
+ " and.l %%d0,%%d1\n"
+ " neg.l %%d1\n"
+ " lea (4f,%%pc,%%d1.l*2),%%a0\n"
+ " move.l %%a0,%%a1\n"
+
+ " lsr.l #4,%%d0\n"
+ " beq.b 3f\n"
+
+ " moveq #0x0f,%%d1\n"
+ " and.l %%d0,%%d1\n"
+ " neg.l %%d1\n"
+ " lea (2f,%%pc,%%d1.l*4),%%a0\n"
+ " lsr.l #4,%%d0\n"
+ " move.l %%d0,%%d1\n"
+ "0:\n"
+ " move.l %%d1,%%d0\n"
+ " jmp (%%a0)\n"
+ "1:\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ "2:\n"
+ " dbra %%d0,1b\n"
+ // handle (w * bytesPerPixel) % 16
+ "3:\n"
+ " jmp (%%a1)\n"
+ // only 15x move.b as 16 would be handled above
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ "4:\n"
+ " add.l %4,%1\n"
+ " add.l %5,%0\n"
+ " dbra %3,0b\n"
+ : // outputs
+ : "a"(src), "a"(dst), "g"(w * bytesPerPixel), "d"(h - 1),
+ "g"(dstPitch - w * bytesPerPixel), "g"(srcPitch - w * bytesPerPixel) // inputs
+ : "d0", "d1", "a0", "a1", "cc" AND_MEMORY
+ );
+ } else {
+#else
+ {
+#endif
+ for (uint i = 0; i < h; ++i) {
+ memcpy(dst, src, w * bytesPerPixel);
+ dst += dstPitch;
+ src += srcPitch;
+ }
}
}
}
More information about the Scummvm-git-logs
mailing list