[Scummvm-git-logs] scummvm master -> d23fab69d4088834f270ce853e72c5a65459dd05
bluegr
noreply at scummvm.org
Sun Aug 17 00:10:00 UTC 2025
This automated email contains information about 2 new commits which have been
pushed to the 'scummvm' repo located at https://api.github.com/repos/scummvm/scummvm .
Summary:
b18f9a103b BLADERUNNER: use Graphics::crossBlit and crossMaskBlit for VQADecoder
d23fab69d4 GRAPHICS: add ARM NEON fast blit for XRGB1555->RGB565
Commit: b18f9a103be2c6eb31a0a78d2456ebae1a9346d6
https://github.com/scummvm/scummvm/commit/b18f9a103be2c6eb31a0a78d2456ebae1a9346d6
Author: William Bonnaventure (william.bonnaventure at gmail.com)
Date: 2025-08-17T03:09:56+03:00
Commit Message:
BLADERUNNER: use Graphics::crossBlit and crossMaskBlit for VQADecoder
Changed paths:
engines/bladerunner/vqa_decoder.cpp
diff --git a/engines/bladerunner/vqa_decoder.cpp b/engines/bladerunner/vqa_decoder.cpp
index 9919b4b020a..2647a25f978 100644
--- a/engines/bladerunner/vqa_decoder.cpp
+++ b/engines/bladerunner/vqa_decoder.cpp
@@ -32,6 +32,8 @@
#include "audio/decoders/raw.h"
+#include "graphics/blit.h"
+
#include "common/array.h"
#include "common/util.h"
#include "common/memstream.h"
@@ -1101,34 +1103,63 @@ void VQADecoder::VQAVideoTrack::VPTRWriteBlock(Graphics::Surface *surface, unsig
uint32 intermDiv = 0;
uint32 dst_x = 0;
uint32 dst_y = 0;
- uint16 vqaColor = 0;
- uint8 a, r, g, b;
+
+ // Alpha component is inversed, set srcFormat to XRGB1555 to ignore it
+ // Instead we manually transform alpha values into a mask
+ Graphics::PixelFormat srcFormat = Graphics::PixelFormat(2, 5, 5, 5, 0, 10, 5, 0, 0);
+ const uint16 *src_p = (const uint16 *)block_src;
+ uint8 *mask = nullptr;
+
+#ifdef SCUMM_BIG_ENDIAN
+ // Swap bytes to big endian as the source is little endian
+ uint16 *swapSrc = (uint16 *)malloc(2 * _blockW * _blockH);
+ if (!swapSrc) {
+ warning("Not enough memory for VPTRWriteBlock");
+ return;
+ }
+
+ for (uint x = 0; x < _blockW * _blockH; ++x) {
+ swapSrc[x] = SWAP_BYTES_16(src_p[x]);
+ }
+
+ src_p = swapSrc;
+#endif
+
+ if (alpha) {
+ mask = (uint8 *)malloc(_blockW * _blockH);
+ if (!mask) {
+ warning("Not enough memory for VPTRWriteBlock");
+ return;
+ }
+ // Create mask using alpha values
+ for (uint x = 0; x < _blockW * _blockH; ++x) {
+ // Extract alpha value
+ // We XOR it with 1 to invert and get an actual alpha value
+ mask[x] = (byte)(READ_UINT16(src_p + x) >> 15) ^ 0x01;
+ }
+ }
for (uint i = count; i != 0; --i) {
- // aux variable to avoid duplicate division and a modulo operation
intermDiv = (dstBlock + count - i) / blocks_per_line; // start of current blocks line
dst_x = ((dstBlock + count - i) - intermDiv * blocks_per_line) * _blockW + _offsetX;
dst_y = intermDiv * _blockH + _offsetY;
- const uint8 *src_p = block_src;
-
- for (uint y = _blockH; y != 0; --y) {
- for (uint x = _blockW; x != 0; --x) {
- vqaColor = READ_LE_UINT16(src_p);
- src_p += 2;
-
- getGameDataColor(vqaColor, a, r, g, b);
-
- if (!(alpha && a)) {
- // CLIP() is too slow and it is not needed.
- // void* dstPtr = surface->getBasePtr(CLIP(dst_x + x, (uint32)0, (uint32)(surface->w - 1)), CLIP(dst_y + y, (uint32)0, (uint32)(surface->h - 1)));
- void* dstPtr = surface->getBasePtr(dst_x + _blockW - x, dst_y + _blockH - y);
- // Ignore the alpha in the output as it is inversed in the input
- drawPixel(*surface, dstPtr, surface->format.RGBToColor(r, g, b));
- }
- }
+ uint8* dstPtr = (uint8 *)surface->getBasePtr(dst_x, dst_y);
+ if (alpha) {
+ // Use mask to blit
+ Graphics::crossMaskBlit(dstPtr, (const byte *)src_p, (const byte *)mask, surface->pitch, _blockW * 2, _blockW, _blockW, _blockH, surface->format, srcFormat);
+ } else {
+ Graphics::crossBlit(dstPtr, (const byte *)src_p, surface->pitch, _blockW * 2, _blockW, _blockH, surface->format, srcFormat);
}
}
+
+#ifdef SCUMM_BIG_ENDIAN
+ if (swapSrc)
+ free(swapSrc);
+#endif
+
+ if (mask)
+ free(mask);
}
bool VQADecoder::VQAVideoTrack::decodeFrame(Graphics::Surface *surface) {
Commit: d23fab69d4088834f270ce853e72c5a65459dd05
https://github.com/scummvm/scummvm/commit/d23fab69d4088834f270ce853e72c5a65459dd05
Author: William Bonnaventure (william.bonnaventure at gmail.com)
Date: 2025-08-17T03:09:56+03:00
Commit Message:
GRAPHICS: add ARM NEON fast blit for XRGB1555->RGB565
Changed paths:
graphics/blit.h
graphics/blit/blit-fast.cpp
graphics/blit/blit-neon.cpp
graphics/blit/blit.cpp
diff --git a/graphics/blit.h b/graphics/blit.h
index a023dc1cd29..d6704848c52 100644
--- a/graphics/blit.h
+++ b/graphics/blit.h
@@ -212,6 +212,11 @@ bool crossMaskBlitMap(byte *dst, const byte *src, const byte *mask,
typedef void (*FastBlitFunc)(byte *, const byte *, const uint, const uint, const uint, const uint);
+#ifdef SCUMMVM_NEON
+// Fast blit functions for ARM NEON
+void fastBlitNEON_XRGB1555_RGB565(byte *, const byte *, const uint, const uint, const uint, const uint);
+#endif
+
/**
* Look up optimised routines for converting between pixel formats.
*
diff --git a/graphics/blit/blit-fast.cpp b/graphics/blit/blit-fast.cpp
index f391bccf6b0..8f2200792c7 100644
--- a/graphics/blit/blit-fast.cpp
+++ b/graphics/blit/blit-fast.cpp
@@ -22,6 +22,7 @@
#include "graphics/blit.h"
#include "graphics/pixelformat.h"
#include "common/endian.h"
+#include "common/system.h"
namespace Graphics {
@@ -85,7 +86,15 @@ static const FastBlitLookup fastBlitFuncs_4to4[] = {
// 32-bit byteswap and rotate left
{ swapBlit<true, 24>, Graphics::PixelFormat(4, 8, 8, 8, 8, 24, 16, 8, 0), Graphics::PixelFormat(4, 8, 8, 8, 8, 8, 16, 24, 0) }, // RGBA8888 -> BGRA8888
{ swapBlit<true, 24>, Graphics::PixelFormat(4, 8, 8, 8, 8, 8, 16, 24, 0), Graphics::PixelFormat(4, 8, 8, 8, 8, 24, 16, 8, 0) } // BGRA8888 -> RGBA8888
+
+};
+
+#ifdef SCUMMVM_NEON
+static const FastBlitLookup fastBlitFuncs_NEON[] = {
+ // 16-bit with NEON
+ { fastBlitNEON_XRGB1555_RGB565, Graphics::PixelFormat(2, 5, 5, 5, 0, 10, 5, 0, 0), Graphics::PixelFormat(2, 5, 6, 5, 0, 11, 5, 0, 0) }, // XRGB1555 -> RGB565
};
+#endif
FastBlitFunc getFastBlitFunc(const PixelFormat &dstFmt, const PixelFormat &srcFmt) {
const uint dstBpp = dstFmt.bytesPerPixel;
@@ -96,18 +105,33 @@ FastBlitFunc getFastBlitFunc(const PixelFormat &dstFmt, const PixelFormat &srcFm
if (srcBpp == 4 && dstBpp == 4) {
table = fastBlitFuncs_4to4;
length = ARRAYSIZE(fastBlitFuncs_4to4);
- } else {
- return nullptr;
+
+ for (size_t i = 0; i < length; i++) {
+ if (srcFmt != table[i].srcFmt)
+ continue;
+ if (dstFmt != table[i].dstFmt)
+ continue;
+
+ return table[i].func;
+ }
}
- for (size_t i = 0; i < length; i++) {
- if (srcFmt != table[i].srcFmt)
- continue;
- if (dstFmt != table[i].dstFmt)
- continue;
+#ifdef SCUMMVM_NEON
+ if (srcBpp == 2 && dstBpp == 2 && g_system->hasFeature(OSystem::kFeatureCpuNEON)) {
+ table = fastBlitFuncs_NEON;
+ length = ARRAYSIZE(fastBlitFuncs_NEON);
+
+ for (size_t i = 0; i < length; i++) {
+ if (srcFmt != table[i].srcFmt)
+ continue;
+ if (dstFmt != table[i].dstFmt)
+ continue;
- return table[i].func;
+ return table[i].func;
+ }
}
+#endif
+
return nullptr;
}
diff --git a/graphics/blit/blit-neon.cpp b/graphics/blit/blit-neon.cpp
index eac78aea87c..6b3ecc79715 100644
--- a/graphics/blit/blit-neon.cpp
+++ b/graphics/blit/blit-neon.cpp
@@ -324,6 +324,58 @@ void BlendBlit::blitNEON(Args &args, const TSpriteBlendMode &blendMode, const Al
blitT<BlendBlitImpl_NEON>(args, blendMode, alphaType);
}
+void fastBlitNEON_XRGB1555_RGB565(byte *dst, const byte *src,
+ const uint dstPitch, const uint srcPitch,
+ const uint w, const uint h) {
+ const uint srcDelta = (srcPitch - w * 2);
+ const uint dstDelta = (dstPitch - w * 2);
+
+ const uint16 *src_ptr = (const uint16 *)src;
+ uint16 *dst_ptr = (uint16 *)dst;
+ uint16x4_t pixels;
+
+ for (uint y = h; y > 0; --y) {
+ uint x = w;
+ for (; x >= 4; x -= 4) {
+ src_ptr = (const uint16 *)src;
+ dst_ptr = (uint16 *)dst;
+
+ // Load pixels to NEON
+ pixels = vld1_u16(src_ptr);
+
+ // Convert from XRGB1555 to RGB565
+ // Here we do : ((pixels & 0x7FE0) << 1) | ((pixels & 0x0200) >> 4) | (pixels & 0x001F)
+ pixels = vorr_u16(
+ vorr_u16(
+ vshl_n_u16(vand_u16(pixels, vmov_n_u16(0x7FE0)), 1),
+ vshr_n_u16(vand_u16(pixels, vmov_n_u16(0x0200)), 4)
+ ),
+ vand_u16(pixels, vmov_n_u16(0x001F))
+ );
+
+ // Store pixels to destination
+ vst1_u16(dst_ptr, pixels);
+
+ src += 4 * 2;
+ dst += 4 * 2;
+ }
+
+ for (; x > 0; --x) {
+ // We have remaining pixels, convert them the classic way
+ src_ptr = (const uint16 *)src;
+ dst_ptr = (uint16 *)dst;
+
+ *dst_ptr = ((((*src_ptr) & 0x7FE0) << 1) | (((*src_ptr) & 0x0200) >> 4) | ((*src_ptr) & 0x001F));
+
+ src += 2;
+ dst += 2;
+ }
+
+ src += srcDelta;
+ dst += dstDelta;
+ }
+}
+
} // end of namespace Graphics
#if !defined(__aarch64__) && !defined(__ARM_NEON)
diff --git a/graphics/blit/blit.cpp b/graphics/blit/blit.cpp
index 88510e4edbc..6fc17b609d8 100644
--- a/graphics/blit/blit.cpp
+++ b/graphics/blit/blit.cpp
@@ -322,7 +322,7 @@ bool crossBlit(byte *dst, const byte *src,
}
// Attempt to use a faster method if possible
- FastBlitFunc blitFunc = getFastBlitFunc(dstFmt, dstFmt);
+ FastBlitFunc blitFunc = getFastBlitFunc(dstFmt, srcFmt);
if (blitFunc) {
blitFunc(dst, src, dstPitch, srcPitch, w, h);
return true;
More information about the Scummvm-git-logs
mailing list