[Scummvm-git-logs] scummvm master -> 06af761337d3908ac15a4db6554264f063992fd9
mikrosk
noreply at scummvm.org
Mon Mar 20 21:17:29 UTC 2023
This automated email contains information about 1 new commit which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .
Summary:
06af761337 GRAPHICS: ATARI: Align surface on a 16-byte boundary
Commit: 06af761337d3908ac15a4db6554264f063992fd9
https://github.com/scummvm/scummvm/commit/06af761337d3908ac15a4db6554264f063992fd9
Author: Miro Kropacek (miro.kropacek at gmail.com)
Date: 2023-03-20T22:17:25+01:00
Commit Message:
GRAPHICS: ATARI: Align surface on a 16-byte boundary
Also implement a CPU-based optimization for the 68040 / 68060.
Changed paths:
backends/graphics/atari/atari-graphics.cpp
graphics/blit-atari.cpp
diff --git a/backends/graphics/atari/atari-graphics.cpp b/backends/graphics/atari/atari-graphics.cpp
index 462d7385553..f325a6f4fe4 100644
--- a/backends/graphics/atari/atari-graphics.cpp
+++ b/backends/graphics/atari/atari-graphics.cpp
@@ -1001,7 +1001,7 @@ void AtariGraphicsManager::Cursor::setSurface(const void *buf, int w, int h, int
if (surface.w != w || surface.h != h || surface.format != format)
surface.create(w, h, format);
- surface.copyRectToSurface(buf, surface.pitch, 0, 0, w, h);
+ surface.copyRectToSurface(buf, w * format.bytesPerPixel, 0, 0, w, h);
hotspotX = _hotspotX;
hotspotY = _hotspotY;
diff --git a/graphics/blit-atari.cpp b/graphics/blit-atari.cpp
index aa80e235290..ca54c7306ce 100644
--- a/graphics/blit-atari.cpp
+++ b/graphics/blit-atari.cpp
@@ -22,15 +22,24 @@
#include "graphics/blit.h"
#include "graphics/surface.h"
-#include <cstdlib> // calloc
-#include <cstring> // memcpy
+#include <cstdlib> // malloc
+#include <cstring> // memcpy, memset
#include <mint/cookie.h>
#include <mint/falcon.h>
#include "backends/graphics/atari/atari-graphics-superblitter.h"
+#include "common/textconsole.h" // error
+
+static inline bool hasMove16() {
+ long val;
+ static bool hasMove16 = Getcookie(C__CPU, &val) == C_FOUND && val >= 40;
+ return hasMove16;
+}
namespace Graphics {
+constexpr size_t ALIGN = 16; // 16 bytes
+
// hijack surface overrides here as well as these are tightly related
// to the blitting routine below
void Surface::create(int16 width, int16 height, const PixelFormat &f) {
@@ -40,24 +49,41 @@ void Surface::create(int16 width, int16 height, const PixelFormat &f) {
w = width;
h = height;
format = f;
- pitch = w * format.bytesPerPixel;
+ // align pitch to a 16-byte boundary for a possible C2P conversion
+ pitch = (w * format.bytesPerPixel + ALIGN - 1) & (-ALIGN);
if (width && height) {
- if (VgetMonitor() == MON_VGA && Getcookie(C_SupV, NULL) == C_FOUND)
- pixels = (void*)ct60_vmalloc(width * height * format.bytesPerPixel);
- else
- pixels = calloc(width * height, format.bytesPerPixel);
- assert(pixels);
+ if (VgetMonitor() == MON_VGA && Getcookie(C_SupV, NULL) == C_FOUND) {
+ pixels = (void *)ct60_vmalloc(height * pitch);
+
+ if (!pixels)
+ error("Not enough SVRAM to allocate a surface");
+
+ assert((uintptr)pixels >= 0xA0000000);
+ } else {
+ // align buffer to a 16-byte boundary for move16 or C2P conversion
+ void *pixelsUnaligned = ::malloc(sizeof(uintptr) + (height * pitch) + ALIGN - 1);
+
+ if (!pixelsUnaligned)
+ error("Not enough memory to allocate a surface");
+
+ pixels = (void *)(((uintptr)pixelsUnaligned + sizeof(uintptr) + ALIGN - 1) & (-ALIGN));
+
+ // store the unaligned pointer for later free()
+ *((uintptr *)pixels - 1) = (uintptr)pixelsUnaligned;
+ }
+
+ memset(pixels, 0, height * pitch);
}
}
void Surface::free() {
if (((uintptr)pixels & 0xFF000000) >= 0xA0000000)
ct60_vmfree(pixels);
- else
- ::free(pixels);
+ else if (pixels)
+ ::free((void *)*((uintptr *)pixels - 1));
- pixels = 0;
+ pixels = nullptr;
w = h = pitch = 0;
format = PixelFormat();
}
@@ -87,12 +113,136 @@ void copyBlit(byte *dst, const byte *src,
// wait until we finish otherwise we may overwrite pixels written manually afterwards
while (*SV_BLITTER_CONTROL & 1);
} else if (dstPitch == srcPitch && ((w * bytesPerPixel) == dstPitch)) {
- memcpy(dst, src, dstPitch * h);
+ if (hasMove16() && ((uintptr)src & (ALIGN - 1)) == 0 && ((uintptr)dst & (ALIGN - 1)) == 0) {
+ __asm__ volatile(
+ " move.l %2,d0\n"
+ " lsr.l #4,d0\n"
+ " beq.b 3f\n"
+
+ " moveq #0x0f,d1\n"
+ " and.l d0,d1\n"
+ " neg.l d1\n"
+ " lsr.l #4,d0\n"
+ " jmp (2f,pc,d1.l*4)\n"
+ "1:\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ "2:\n"
+ " dbra d0,1b\n"
+ // handle also the unlikely case when 'dstPitch'
+ // is not divisible by 16 but 'src' and 'dst' are
+ "3:\n"
+ " moveq #0x0f,d0\n"
+ " and.l %2,d0\n"
+ " neg.l d0\n"
+ " jmp (4f,pc,d0.l*2)\n"
+ // only 15x move.b as 16 would be handled above
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ "4:\n"
+ : // outputs
+ : "a"(src), "a"(dst), "g"(dstPitch * h) // inputs
+ : "d0", "d1", "cc" AND_MEMORY
+ );
+ } else {
+ memcpy(dst, src, dstPitch * h);
+ }
} else {
- for (uint i = 0; i < h; ++i) {
- memcpy(dst, src, w * bytesPerPixel);
- dst += dstPitch;
- src += srcPitch;
+ if (hasMove16() && ((uintptr)src & (ALIGN - 1)) == 0 && ((uintptr)dst & (ALIGN - 1)) == 0) {
+ __asm__ volatile(
+ "0:\n"
+ " move.l %2,d0\n"
+ " lsr.l #4,d0\n"
+ " beq.b 3f\n"
+
+ " moveq #0x0f,d1\n"
+ " and.l d0,d1\n"
+ " neg.l d1\n"
+ " lsr.l #4,d0\n"
+ " jmp (2f,pc,d1.l*4)\n"
+ "1:\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ " move16 (%0)+,(%1)+\n"
+ "2:\n"
+ " dbra d0,1b\n"
+ // handle (w * bytesPerPixel) % 16
+ "3:\n"
+ " moveq #0x0f,d0\n"
+ " and.l %2,d0\n"
+ " neg.l d0\n"
+ " jmp (4f,pc,d0.l*2)\n"
+ // only 15x move.b as 16 would be handled above
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ " move.b (%0)+,(%1)+\n"
+ "4:\n"
+ " add.l %4,%1\n"
+ " add.l %5,%0\n"
+ " dbra %3,0b\n"
+ : // outputs
+ : "a"(src), "a"(dst), "g"(w * bytesPerPixel), "d"(h - 1),
+ "g"(dstPitch - w * bytesPerPixel), "g"(srcPitch - w * bytesPerPixel) // inputs
+ : "d0", "d1", "d2", "cc" AND_MEMORY
+ );
+ } else {
+ for (uint i = 0; i < h; ++i) {
+ memcpy(dst, src, w * bytesPerPixel);
+ dst += dstPitch;
+ src += srcPitch;
+ }
}
}
}
More information about the Scummvm-git-logs
mailing list