[Scummvm-git-logs] scummvm master -> d23fab69d4088834f270ce853e72c5a65459dd05

Sun Aug 17 00:10:00 UTC 2025

This automated email contains information about 2 new commits which have been
pushed to the 'scummvm' repo located at https://api.github.com/repos/scummvm/scummvm .

Summary:
b18f9a103b BLADERUNNER: use Graphics::crossBlit and crossMaskBlit for VQADecoder
d23fab69d4 GRAPHICS: add ARM NEON fast blit for XRGB1555->RGB565


Commit: b18f9a103be2c6eb31a0a78d2456ebae1a9346d6
    https://github.com/scummvm/scummvm/commit/b18f9a103be2c6eb31a0a78d2456ebae1a9346d6
Author: William Bonnaventure (william.bonnaventure at gmail.com)
Date: 2025-08-17T03:09:56+03:00

Commit Message:
BLADERUNNER: use Graphics::crossBlit and crossMaskBlit for VQADecoder

Changed paths:
    engines/bladerunner/vqa_decoder.cpp

diff --git a/engines/bladerunner/vqa_decoder.cpp b/engines/bladerunner/vqa_decoder.cpp
index 9919b4b020a..2647a25f978 100644
--- a/engines/bladerunner/vqa_decoder.cpp
+++ b/engines/bladerunner/vqa_decoder.cpp
@@ -32,6 +32,8 @@
 
 #include "audio/decoders/raw.h"
 
+#include "graphics/blit.h"
+
 #include "common/array.h"
 #include "common/util.h"
 #include "common/memstream.h"
@@ -1101,34 +1103,63 @@ void VQADecoder::VQAVideoTrack::VPTRWriteBlock(Graphics::Surface *surface, unsig
 	uint32 intermDiv = 0;
 	uint32 dst_x = 0;
 	uint32 dst_y = 0;
-	uint16 vqaColor = 0;
-	uint8 a, r, g, b;
+
+	// Alpha component is inversed, set srcFormat to XRGB1555 to ignore it
+	// Instead we manually transform alpha values into a mask
+	Graphics::PixelFormat srcFormat = Graphics::PixelFormat(2, 5, 5, 5, 0, 10, 5, 0, 0);
+	const uint16 *src_p = (const uint16 *)block_src;
+	uint8 *mask = nullptr;
+
+#ifdef SCUMM_BIG_ENDIAN
+	// Swap bytes to big endian as the source is little endian
+	uint16 *swapSrc = (uint16 *)malloc(2 * _blockW * _blockH);
+	if (!swapSrc) {
+		warning("Not enough memory for VPTRWriteBlock");
+		return;
+	}
+
+	for (uint x = 0; x < _blockW * _blockH; ++x) {
+		swapSrc[x] = SWAP_BYTES_16(src_p[x]);
+	}
+
+	src_p = swapSrc;
+#endif
+
+	if (alpha) {
+		mask = (uint8 *)malloc(_blockW * _blockH);
+		if (!mask) {
+			warning("Not enough memory for VPTRWriteBlock");
+			return;
+		}
+		// Create mask using alpha values
+		for (uint x = 0; x < _blockW * _blockH; ++x) {
+			// Extract alpha value
+			// We XOR it with 1 to invert and get an actual alpha value
+			mask[x] = (byte)(READ_UINT16(src_p + x) >> 15) ^ 0x01;
+		}
+	}
 
 	for (uint i = count; i != 0; --i) {
-		// aux variable to avoid duplicate division and a modulo operation
 		intermDiv = (dstBlock + count - i) / blocks_per_line; // start of current blocks line
 		dst_x = ((dstBlock + count - i) - intermDiv * blocks_per_line) * _blockW + _offsetX;
 		dst_y = intermDiv * _blockH + _offsetY;
 
-		const uint8 *src_p = block_src;
-
-		for (uint y = _blockH; y != 0; --y) {
-			for (uint x = _blockW; x != 0; --x) {
-				vqaColor = READ_LE_UINT16(src_p);
-				src_p += 2;
-
-				getGameDataColor(vqaColor, a, r, g, b);
-
-				if (!(alpha && a)) {
-					// CLIP() is too slow and it is not needed.
-					// void* dstPtr = surface->getBasePtr(CLIP(dst_x + x, (uint32)0, (uint32)(surface->w - 1)), CLIP(dst_y + y, (uint32)0, (uint32)(surface->h - 1)));
-					void* dstPtr = surface->getBasePtr(dst_x + _blockW - x, dst_y + _blockH - y);
-					// Ignore the alpha in the output as it is inversed in the input
-					drawPixel(*surface, dstPtr, surface->format.RGBToColor(r, g, b));
-				}
-			}
+		uint8* dstPtr = (uint8 *)surface->getBasePtr(dst_x, dst_y);
+		if (alpha) {
+			// Use mask to blit
+			Graphics::crossMaskBlit(dstPtr, (const byte *)src_p, (const byte *)mask, surface->pitch, _blockW * 2, _blockW, _blockW, _blockH, surface->format, srcFormat);
+		} else {
+			Graphics::crossBlit(dstPtr, (const byte *)src_p, surface->pitch, _blockW * 2, _blockW, _blockH, surface->format, srcFormat);
 		}
 	}
+
+#ifdef SCUMM_BIG_ENDIAN
+	if (swapSrc)
+		free(swapSrc);
+#endif
+
+	if (mask)
+		free(mask);
 }
 
 bool VQADecoder::VQAVideoTrack::decodeFrame(Graphics::Surface *surface) {


Commit: d23fab69d4088834f270ce853e72c5a65459dd05
    https://github.com/scummvm/scummvm/commit/d23fab69d4088834f270ce853e72c5a65459dd05
Author: William Bonnaventure (william.bonnaventure at gmail.com)
Date: 2025-08-17T03:09:56+03:00

Commit Message:
GRAPHICS: add ARM NEON fast blit for XRGB1555->RGB565

Changed paths:
    graphics/blit.h
    graphics/blit/blit-fast.cpp
    graphics/blit/blit-neon.cpp
    graphics/blit/blit.cpp


diff --git a/graphics/blit.h b/graphics/blit.h
index a023dc1cd29..d6704848c52 100644
--- a/graphics/blit.h
+++ b/graphics/blit.h
@@ -212,6 +212,11 @@ bool crossMaskBlitMap(byte *dst, const byte *src, const byte *mask,
 
 typedef void (*FastBlitFunc)(byte *, const byte *, const uint, const uint, const uint, const uint);
 
+#ifdef SCUMMVM_NEON
+// Fast blit functions for ARM NEON
+void fastBlitNEON_XRGB1555_RGB565(byte *, const byte *, const uint, const uint, const uint, const uint);
+#endif
+
 /**
  * Look up optimised routines for converting between pixel formats.
  *
diff --git a/graphics/blit/blit-fast.cpp b/graphics/blit/blit-fast.cpp
index f391bccf6b0..8f2200792c7 100644
--- a/graphics/blit/blit-fast.cpp
+++ b/graphics/blit/blit-fast.cpp
@@ -22,6 +22,7 @@
 #include "graphics/blit.h"
 #include "graphics/pixelformat.h"
 #include "common/endian.h"
+#include "common/system.h"
 
 namespace Graphics {
 
@@ -85,7 +86,15 @@ static const FastBlitLookup fastBlitFuncs_4to4[] = {
 	// 32-bit byteswap and rotate left
 	{ swapBlit<true,  24>, Graphics::PixelFormat(4, 8, 8, 8, 8, 24, 16,  8,  0), Graphics::PixelFormat(4, 8, 8, 8, 8,  8, 16, 24,  0) }, // RGBA8888 -> BGRA8888
 	{ swapBlit<true,  24>, Graphics::PixelFormat(4, 8, 8, 8, 8,  8, 16, 24,  0), Graphics::PixelFormat(4, 8, 8, 8, 8, 24, 16,  8,  0) }  // BGRA8888 -> RGBA8888
+
+};
+
+#ifdef SCUMMVM_NEON
+static const FastBlitLookup fastBlitFuncs_NEON[] = {
+	// 16-bit with NEON
+	{ fastBlitNEON_XRGB1555_RGB565, Graphics::PixelFormat(2, 5, 5, 5, 0, 10, 5, 0, 0), Graphics::PixelFormat(2, 5, 6, 5, 0, 11, 5, 0, 0) }, // XRGB1555 -> RGB565
 };
+#endif
 
 FastBlitFunc getFastBlitFunc(const PixelFormat &dstFmt, const PixelFormat &srcFmt) {
 	const uint dstBpp = dstFmt.bytesPerPixel;
@@ -96,18 +105,33 @@ FastBlitFunc getFastBlitFunc(const PixelFormat &dstFmt, const PixelFormat &srcFm
 	if (srcBpp == 4 && dstBpp == 4) {
 		table = fastBlitFuncs_4to4;
 		length = ARRAYSIZE(fastBlitFuncs_4to4);
-	} else {
-		return nullptr;
+
+		for (size_t i = 0; i < length; i++) {
+			if (srcFmt != table[i].srcFmt)
+				continue;
+			if (dstFmt != table[i].dstFmt)
+				continue;
+
+			return table[i].func;
+		}
 	}
 
-	for (size_t i = 0; i < length; i++) {
-		if (srcFmt != table[i].srcFmt)
-			continue;
-		if (dstFmt != table[i].dstFmt)
-			continue;
+#ifdef SCUMMVM_NEON
+	if (srcBpp == 2 && dstBpp == 2 && g_system->hasFeature(OSystem::kFeatureCpuNEON)) {
+		table = fastBlitFuncs_NEON;
+		length = ARRAYSIZE(fastBlitFuncs_NEON);
+
+		for (size_t i = 0; i < length; i++) {
+			if (srcFmt != table[i].srcFmt)
+				continue;
+			if (dstFmt != table[i].dstFmt)
+				continue;
 
-		return table[i].func;
+			return table[i].func;
+		}
 	}
+#endif
+
 	return nullptr;
 }
 
diff --git a/graphics/blit/blit-neon.cpp b/graphics/blit/blit-neon.cpp
index eac78aea87c..6b3ecc79715 100644
--- a/graphics/blit/blit-neon.cpp
+++ b/graphics/blit/blit-neon.cpp
@@ -324,6 +324,58 @@ void BlendBlit::blitNEON(Args &args, const TSpriteBlendMode &blendMode, const Al
 	blitT<BlendBlitImpl_NEON>(args, blendMode, alphaType);
 }
 
+void fastBlitNEON_XRGB1555_RGB565(byte *dst, const byte *src,
+                  const uint dstPitch, const uint srcPitch,
+                  const uint w, const uint h) {
+	const uint srcDelta = (srcPitch - w * 2);
+	const uint dstDelta = (dstPitch - w * 2);
+
+	const uint16 *src_ptr = (const uint16 *)src;
+	uint16 *dst_ptr = (uint16 *)dst;
+	uint16x4_t pixels;
+
+	for (uint y = h; y > 0; --y) {
+		uint x = w;
+		for (; x >= 4; x -= 4) {
+			src_ptr = (const uint16 *)src;
+			dst_ptr = (uint16 *)dst;
+
+			// Load pixels to NEON
+			pixels = vld1_u16(src_ptr);
+
+			// Convert from XRGB1555 to RGB565
+			// Here we do : ((pixels & 0x7FE0) << 1) | ((pixels & 0x0200) >> 4) | (pixels & 0x001F)
+			pixels = vorr_u16(
+				vorr_u16(
+					vshl_n_u16(vand_u16(pixels, vmov_n_u16(0x7FE0)), 1),
+					vshr_n_u16(vand_u16(pixels, vmov_n_u16(0x0200)), 4)
+				),
+				vand_u16(pixels, vmov_n_u16(0x001F))
+			);
+
+			// Store pixels to destination
+			vst1_u16(dst_ptr, pixels);
+
+			src += 4 * 2;
+			dst += 4 * 2;
+		}
+
+		for (; x > 0; --x) {
+			// We have remaining pixels, convert them the classic way
+			src_ptr = (const uint16 *)src;
+			dst_ptr = (uint16 *)dst;
+
+			*dst_ptr = ((((*src_ptr) & 0x7FE0) << 1) | (((*src_ptr) & 0x0200) >> 4) | ((*src_ptr) & 0x001F));
+
+			src += 2;
+			dst += 2;
+		}
+
+		src += srcDelta;
+		dst += dstDelta;
+	}
+}
+
 } // end of namespace Graphics
 
 #if !defined(__aarch64__) && !defined(__ARM_NEON)
diff --git a/graphics/blit/blit.cpp b/graphics/blit/blit.cpp
index 88510e4edbc..6fc17b609d8 100644
--- a/graphics/blit/blit.cpp
+++ b/graphics/blit/blit.cpp
@@ -322,7 +322,7 @@ bool crossBlit(byte *dst, const byte *src,
 	}
 
 	// Attempt to use a faster method if possible
-	FastBlitFunc blitFunc = getFastBlitFunc(dstFmt, dstFmt);
+	FastBlitFunc blitFunc = getFastBlitFunc(dstFmt, srcFmt);
 	if (blitFunc) {
 		blitFunc(dst, src, dstPitch, srcPitch, w, h);
 		return true;