[Scummvm-git-logs] scummvm master -> 3bf4795c20621d2ea8e9940207d1f7676507768e

sev- noreply at scummvm.org
Mon Aug 28 19:22:35 UTC 2023


This automated email contains information about 3 new commits which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .

Summary:
ace85f1cac NEWS: Mention new CPU blending routinnes in ManagedSurface and in AGS.
8f5a157b84 NEWS: Fix indentation
3bf4795c20 AGS: Added missing license headers


Commit: ace85f1cac38d44d52f606175e3d00251cf88c7c
    https://github.com/scummvm/scummvm/commit/ace85f1cac38d44d52f606175e3d00251cf88c7c
Author: Eugene Sandulenko (sev at scummvm.org)
Date: 2023-08-28T21:21:06+02:00

Commit Message:
NEWS: Mention new CPU blending routinnes in ManagedSurface and in AGS.

Changed paths:
    NEWS.md


diff --git a/NEWS.md b/NEWS.md
index 4dfd2cc1df6..3638cdfbc16 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -22,10 +22,12 @@ For a more comprehensive changelog of the latest experimental code, see:
   - Added support for PC98 font ROM.
   - Added optional dependency for libsonivox, implementing EAS (Embedded Audio
     Synthesis) sound driver.
-  - Added YUV422 and YUV444 support to Theora decoder.  
+  - Added YUV422 and YUV444 support to Theora decoder.
+  - Implemented specialized CPU routines for graphics blitting for ManagedSurface.
 
  AGS:
    - Synced with upstream AGS 3.6.0.51.
+   - Sped up graphics manipulation on modern hardware with CPU-specific code.
 
  Asylum:
    - Added support for Chinese Sanitarium.
@@ -42,7 +44,7 @@ For a more comprehensive changelog of the latest experimental code, see:
    - Added support for Kyra1 Amiga French release and DOS Czech fan translation.
    - Improved game controls menu accuracy for all platforms.
    - Added support for Chinese EOB2.
-   - Added support for the PC-98 version of EOB2. 
+   - Added support for the PC-98 version of EOB2.
 
  SCUMM:
    - Added support for unpacked RuSCUMM patches for Russian localizations.


Commit: 8f5a157b84959ab49d25770bad6d0926c32d4c37
    https://github.com/scummvm/scummvm/commit/8f5a157b84959ab49d25770bad6d0926c32d4c37
Author: Eugene Sandulenko (sev at scummvm.org)
Date: 2023-08-28T21:22:04+02:00

Commit Message:
NEWS: Fix indentation

Changed paths:
    NEWS.md


diff --git a/NEWS.md b/NEWS.md
index 3638cdfbc16..5040574b8c6 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -17,13 +17,13 @@ For a more comprehensive changelog of the latest experimental code, see:
    - Added Atari port.
 
  General:
-  - Added optional dependency for libvpx.
-  - Added optional dependency for libmikmod.
-  - Added support for PC98 font ROM.
-  - Added optional dependency for libsonivox, implementing EAS (Embedded Audio
-    Synthesis) sound driver.
-  - Added YUV422 and YUV444 support to Theora decoder.
-  - Implemented specialized CPU routines for graphics blitting for ManagedSurface.
+   - Added optional dependency for libvpx.
+   - Added optional dependency for libmikmod.
+   - Added support for PC98 font ROM.
+   - Added optional dependency for libsonivox, implementing EAS (Embedded Audio
+     Synthesis) sound driver.
+   - Added YUV422 and YUV444 support to Theora decoder.
+   - Implemented specialized CPU routines for graphics blitting for ManagedSurface.
 
  AGS:
    - Synced with upstream AGS 3.6.0.51.


Commit: 3bf4795c20621d2ea8e9940207d1f7676507768e
    https://github.com/scummvm/scummvm/commit/3bf4795c20621d2ea8e9940207d1f7676507768e
Author: Eugene Sandulenko (sev at scummvm.org)
Date: 2023-08-28T21:22:18+02:00

Commit Message:
AGS: Added missing license headers

Changed paths:
    engines/ags/lib/allegro/surface_avx2.cpp
    engines/ags/lib/allegro/surface_neon.cpp
    engines/ags/lib/allegro/surface_sse2.cpp


diff --git a/engines/ags/lib/allegro/surface_avx2.cpp b/engines/ags/lib/allegro/surface_avx2.cpp
index 1a19517a47a..a3b78394a38 100644
--- a/engines/ags/lib/allegro/surface_avx2.cpp
+++ b/engines/ags/lib/allegro/surface_avx2.cpp
@@ -1,3 +1,24 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
 #include <immintrin.h>
 #include "ags/lib/allegro/gfx.h"
 #include "ags/lib/allegro/color.h"
@@ -216,7 +237,7 @@ inline __m256i blendTintSpriteSIMD(__m256i srcCols, __m256i destCols, __m256i al
 		val = _mm256_sub_ps(val, _mm256_sub_ps(_mm256_set1_ps(1.0f), _mm256_mul_ps(_mm256_cvtepi32_ps(alphas), _mm256_set1_ps(1.0f / 250.0f))));
 		val = _mm256_max_ps(val, _mm256_setzero_ps());
 	}
-		
+
 	// then it stiches the HSV back together
 	// the hue and saturation come from the source (tint) color, and the value comes from
 	// the destinaion (real source) color
@@ -253,7 +274,7 @@ inline __m256i blendTintSpriteSIMD(__m256i srcCols, __m256i destCols, __m256i al
 	return final;
 }
 
-inline __m256i mul32_as16(__m256i a, __m256i b) {	
+inline __m256i mul32_as16(__m256i a, __m256i b) {
 	__m256i a16 = _mm256_packs_epi32(a, _mm256_setzero_si256());
 	__m256i b16 = _mm256_packs_epi32(b, _mm256_setzero_si256());
 	__m256i res = _mm256_mullo_epi16(a16, b16);
@@ -479,7 +500,7 @@ static void drawInner4BppWithConv(BITMAP::DrawInnerArgs &args) {
 	if (args.yStart + yCtrHeight > args.destArea.h) {
 		yCtrHeight = args.destArea.h - args.yStart;
 	}
-	
+
 	byte *destP = (byte *)args.destArea.getBasePtr(0, destY);
 	const byte *srcP = (const byte *)args.src.getBasePtr(
 	                       args.horizFlip ? args.srcArea.right - 8 : args.srcArea.left,
@@ -580,7 +601,7 @@ static void drawInner4BppWithConv(BITMAP::DrawInnerArgs &args) {
 		}
 		byte *destVal = (byte *)&destP[destX * DestBytesPerPixel];
 		uint32 srcCol = args.dstBitmap.getColor(srcColPtr, SrcBytesPerPixel);
-		
+
 		// Check if this is a transparent color we should skip
 		if (args.skipTrans && ((srcCol & args.alphaMask) == args.transColor))
 			continue;
@@ -650,7 +671,7 @@ static void drawInner2Bpp(BITMAP::DrawInnerArgs &args) {
 	if (args.yStart + yCtrHeight > args.destArea.h) {
 		yCtrHeight = args.destArea.h - args.yStart;
 	}
-	
+
 	byte *destP = (byte *)args.destArea.getBasePtr(0, destY);
 	const byte *srcP = (const byte *)args.src.getBasePtr(
 	                       args.horizFlip ? args.srcArea.right - 16 : args.srcArea.left,
@@ -756,7 +777,7 @@ static void drawInner2Bpp(BITMAP::DrawInnerArgs &args) {
 		}
 		byte *destVal = (byte *)&destP[destX * 2];
 		uint32 srcCol = (uint32)(*(const uint16 *)srcColPtr);
-		
+
 		// Check if this is a transparent color we should skip
 		if (args.skipTrans && srcCol == args.transColor)
 			continue;
@@ -794,7 +815,7 @@ static void drawInner1Bpp(BITMAP::DrawInnerArgs &args) {
 	__m256i scaleAdds2 = _mm256_set_epi32((uint32)args.scaleX*15, (uint32)args.scaleX*14, (uint32)args.scaleX*13, (uint32)args.scaleX*12, (uint32)args.scaleX*11, (uint32)args.scaleX*10, (uint32)args.scaleX*9, (uint32)args.scaleX*8);
 	__m256i scaleAdds3 = _mm256_set_epi32((uint32)args.scaleX*23, (uint32)args.scaleX*22, (uint32)args.scaleX*21, (uint32)args.scaleX*20, (uint32)args.scaleX*19, (uint32)args.scaleX*18, (uint32)args.scaleX*17, (uint32)args.scaleX*16);
 	__m256i scaleAdds4 = _mm256_set_epi32((uint32)args.scaleX*31, (uint32)args.scaleX*30, (uint32)args.scaleX*29, (uint32)args.scaleX*28, (uint32)args.scaleX*27, (uint32)args.scaleX*26, (uint32)args.scaleX*25, (uint32)args.scaleX*24);
-	
+
 	// Clip the bounds ahead of time (so we don't waste time checking if we are in bounds when
 	// we are in the inner loop)
 	int xCtrStart = 0, xCtrWidth = args.dstRect.width();
@@ -818,7 +839,7 @@ static void drawInner1Bpp(BITMAP::DrawInnerArgs &args) {
 	if (args.yStart + yCtrHeight > args.destArea.h) {
 		yCtrHeight = args.destArea.h - args.yStart;
 	}
-	
+
 	byte *destP = (byte *)args.destArea.getBasePtr(0, destY);
 	const byte *srcP = (const byte *)args.src.getBasePtr(
 	                       args.horizFlip ? args.srcArea.right - 32 : args.srcArea.left,
@@ -935,7 +956,7 @@ void BITMAP::drawAVX2(DrawInnerArgs &args) {
 		case 2: DrawInnerImpl::drawInner2Bpp<Scale>(args); break;
 		case 4: DrawInnerImpl::drawInner4BppWithConv<4, 4, Scale>(args); break;
 		}
-	} else if (format.bytesPerPixel == 4 && args.src.format.bytesPerPixel == 2) { 
+	} else if (format.bytesPerPixel == 4 && args.src.format.bytesPerPixel == 2) {
 		DrawInnerImpl::drawInner4BppWithConv<4, 2, Scale>(args);
 	} else if (format.bytesPerPixel == 2 && args.src.format.bytesPerPixel == 4) {
 		DrawInnerImpl::drawInner4BppWithConv<2, 4, Scale>(args);
diff --git a/engines/ags/lib/allegro/surface_neon.cpp b/engines/ags/lib/allegro/surface_neon.cpp
index 15856ec0c5a..a1f13802dd0 100644
--- a/engines/ags/lib/allegro/surface_neon.cpp
+++ b/engines/ags/lib/allegro/surface_neon.cpp
@@ -1,3 +1,24 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
 #include <arm_neon.h>
 #include "ags/ags.h"
 #include "ags/globals.h"
@@ -244,7 +265,7 @@ inline uint32x4_t blendTintSpriteSIMD(uint32x4_t srcCols, uint32x4_t destCols, u
 		val = vsubq_f32(val, vsubq_f32(vmovq_n_f32(1.0), vmulq_n_f32(vcvtq_f32_u32(alphas), 1.0 / 250.0)));
 		val = vmaxq_f32(val, vmovq_n_f32(0.0));
 	}
-		
+
 	// then it stiches the HSV back together
 	// the hue and saturation come from the source (tint) color, and the value comes from
 	// the destinaion (real source) color
@@ -486,7 +507,7 @@ static void drawInner4BppWithConv(BITMAP::DrawInnerArgs &args) {
 	if (args.yStart + yCtrHeight > args.destArea.h) {
 		yCtrHeight = args.destArea.h - args.yStart;
 	}
-	
+
 	byte *destP = (byte *)args.destArea.getBasePtr(0, destY);
 	const byte *srcP = (const byte *)args.src.getBasePtr(
 	                       args.horizFlip ? args.srcArea.right - 4 : args.srcArea.left,
@@ -579,7 +600,7 @@ static void drawInner4BppWithConv(BITMAP::DrawInnerArgs &args) {
 		}
 		byte *destVal = (byte *)&destP[destX * DestBytesPerPixel];
 		uint32 srcCol = args.dstBitmap.getColor(srcColPtr, SrcBytesPerPixel);
-		
+
 		// Check if this is a transparent color we should skip
 		if (args.skipTrans && ((srcCol & args.alphaMask) == args.transColor))
 			continue;
@@ -649,7 +670,7 @@ static void drawInner2Bpp(BITMAP::DrawInnerArgs &args) {
 	if (args.yStart + yCtrHeight > args.destArea.h) {
 		yCtrHeight = args.destArea.h - args.yStart;
 	}
-	
+
 	byte *destP = (byte *)args.destArea.getBasePtr(0, destY);
 	const byte *srcP = (const byte *)args.src.getBasePtr(
 	                       args.horizFlip ? args.srcArea.right - 8 : args.srcArea.left,
@@ -747,7 +768,7 @@ static void drawInner2Bpp(BITMAP::DrawInnerArgs &args) {
 		}
 		byte *destVal = (byte *)&destP[destX * 2];
 		uint32 srcCol = (uint32)(*(const uint16 *)srcColPtr);
-		
+
 		// Check if this is a transparent color we should skip
 		if (args.skipTrans && srcCol == args.transColor)
 			continue;
@@ -785,7 +806,7 @@ static void drawInner1Bpp(BITMAP::DrawInnerArgs &args) {
 	uint32x4_t scaleAdds2 = {(uint32)args.scaleX*4, (uint32)args.scaleX*5, (uint32)args.scaleX*6, (uint32)args.scaleX*7};
 	uint32x4_t scaleAdds3 = {(uint32)args.scaleX*8, (uint32)args.scaleX*9, (uint32)args.scaleX*10, (uint32)args.scaleX*11};
 	uint32x4_t scaleAdds4 = {(uint32)args.scaleX*12, (uint32)args.scaleX*13, (uint32)args.scaleX*14, (uint32)args.scaleX*15};
-	
+
 	// Clip the bounds ahead of time (so we don't waste time checking if we are in bounds when
 	// we are in the inner loop)
 	int xCtrStart = 0, xCtrWidth = args.dstRect.width();
@@ -809,7 +830,7 @@ static void drawInner1Bpp(BITMAP::DrawInnerArgs &args) {
 	if (args.yStart + yCtrHeight > args.destArea.h) {
 		yCtrHeight = args.destArea.h - args.yStart;
 	}
-	
+
 	byte *destP = (byte *)args.destArea.getBasePtr(0, destY);
 	const byte *srcP = (const byte *)args.src.getBasePtr(
 	                       args.horizFlip ? args.srcArea.right - 16 : args.srcArea.left,
@@ -905,7 +926,7 @@ void BITMAP::drawNEON(DrawInnerArgs &args) {
 		case 2: DrawInnerImpl::drawInner2Bpp<Scale>(args); break;
 		case 4: DrawInnerImpl::drawInner4BppWithConv<4, 4, Scale>(args); break;
 		}
-	} else if (format.bytesPerPixel == 4 && args.src.format.bytesPerPixel == 2) { 
+	} else if (format.bytesPerPixel == 4 && args.src.format.bytesPerPixel == 2) {
 		DrawInnerImpl::drawInner4BppWithConv<4, 2, Scale>(args);
 	} else if (format.bytesPerPixel == 2 && args.src.format.bytesPerPixel == 4) {
 		DrawInnerImpl::drawInner4BppWithConv<2, 4, Scale>(args);
diff --git a/engines/ags/lib/allegro/surface_sse2.cpp b/engines/ags/lib/allegro/surface_sse2.cpp
index 185206bb401..80b161ddaa3 100644
--- a/engines/ags/lib/allegro/surface_sse2.cpp
+++ b/engines/ags/lib/allegro/surface_sse2.cpp
@@ -1,3 +1,24 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
 #include <immintrin.h>
 #include "ags/ags.h"
 #include "ags/globals.h"
@@ -222,7 +243,7 @@ inline __m128i blendTintSpriteSIMD(__m128i srcCols, __m128i destCols, __m128i al
 		val = _mm_sub_ps(val, _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_mm_cvtepi32_ps(alphas), _mm_set1_ps(1.0f / 250.0f))));
 		val = _mm_max_ps(val, _mm_setzero_ps());
 	}
-		
+
 	// then it stiches the HSV back together
 	// the hue and saturation come from the source (tint) color, and the value comes from
 	// the destinaion (real source) color
@@ -259,7 +280,7 @@ inline __m128i blendTintSpriteSIMD(__m128i srcCols, __m128i destCols, __m128i al
 	return final;
 }
 
-inline __m128i mul32_as16(__m128i a, __m128i b) {	
+inline __m128i mul32_as16(__m128i a, __m128i b) {
 	__m128i a16 = _mm_packs_epi32(a, _mm_setzero_si128());
 	__m128i b16 = _mm_packs_epi32(b, _mm_setzero_si128());
 	__m128i res = _mm_mullo_epi16(a16, b16);
@@ -496,7 +517,7 @@ static void drawInner4BppWithConv(BITMAP::DrawInnerArgs &args) {
 	if (args.yStart + yCtrHeight > args.destArea.h) {
 		yCtrHeight = args.destArea.h - args.yStart;
 	}
-	
+
 	byte *destP = (byte *)args.destArea.getBasePtr(0, destY);
 	const byte *srcP = (const byte *)args.src.getBasePtr(
 	                       args.horizFlip ? args.srcArea.right - 4 : args.srcArea.left,
@@ -593,7 +614,7 @@ static void drawInner4BppWithConv(BITMAP::DrawInnerArgs &args) {
 		}
 		byte *destVal = (byte *)&destP[destX * DestBytesPerPixel];
 		uint32 srcCol = args.dstBitmap.getColor(srcColPtr, SrcBytesPerPixel);
-		
+
 		// Check if this is a transparent color we should skip
 		if (args.skipTrans && ((srcCol & args.alphaMask) == args.transColor))
 			continue;
@@ -663,7 +684,7 @@ static void drawInner2Bpp(BITMAP::DrawInnerArgs &args) {
 	if (args.yStart + yCtrHeight > args.destArea.h) {
 		yCtrHeight = args.destArea.h - args.yStart;
 	}
-	
+
 	byte *destP = (byte *)args.destArea.getBasePtr(0, destY);
 	const byte *srcP = (const byte *)args.src.getBasePtr(
 	                       args.horizFlip ? args.srcArea.right - 8 : args.srcArea.left,
@@ -761,7 +782,7 @@ static void drawInner2Bpp(BITMAP::DrawInnerArgs &args) {
 		}
 		byte *destVal = (byte *)&destP[destX * 2];
 		uint32 srcCol = (uint32)(*(const uint16 *)srcColPtr);
-		
+
 		// Check if this is a transparent color we should skip
 		if (args.skipTrans && srcCol == args.transColor)
 			continue;
@@ -799,7 +820,7 @@ static void drawInner1Bpp(BITMAP::DrawInnerArgs &args) {
 	__m128i scaleAdds2 = _mm_set_epi32((uint32)args.scaleX*7, (uint32)args.scaleX*6, (uint32)args.scaleX*5, (uint32)args.scaleX*4);
 	__m128i scaleAdds3 = _mm_set_epi32((uint32)args.scaleX*11, (uint32)args.scaleX*10, (uint32)args.scaleX*9, (uint32)args.scaleX*8);
 	__m128i scaleAdds4 = _mm_set_epi32((uint32)args.scaleX*15, (uint32)args.scaleX*14, (uint32)args.scaleX*13, (uint32)args.scaleX*12);
-	
+
 	// Clip the bounds ahead of time (so we don't waste time checking if we are in bounds when
 	// we are in the inner loop)
 	int xCtrStart = 0, xCtrWidth = args.dstRect.width();
@@ -823,7 +844,7 @@ static void drawInner1Bpp(BITMAP::DrawInnerArgs &args) {
 	if (args.yStart + yCtrHeight > args.destArea.h) {
 		yCtrHeight = args.destArea.h - args.yStart;
 	}
-	
+
 	byte *destP = (byte *)args.destArea.getBasePtr(0, destY);
 	const byte *srcP = (const byte *)args.src.getBasePtr(
 	                       args.horizFlip ? args.srcArea.right - 16 : args.srcArea.left,
@@ -924,7 +945,7 @@ void BITMAP::drawSSE2(DrawInnerArgs &args) {
 		case 2: DrawInnerImpl::drawInner2Bpp<Scale>(args); break;
 		case 4: DrawInnerImpl::drawInner4BppWithConv<4, 4, Scale>(args); break;
 		}
-	} else if (format.bytesPerPixel == 4 && args.src.format.bytesPerPixel == 2) { 
+	} else if (format.bytesPerPixel == 4 && args.src.format.bytesPerPixel == 2) {
 		DrawInnerImpl::drawInner4BppWithConv<4, 2, Scale>(args);
 	} else if (format.bytesPerPixel == 2 && args.src.format.bytesPerPixel == 4) {
 		DrawInnerImpl::drawInner4BppWithConv<2, 4, Scale>(args);




More information about the Scummvm-git-logs mailing list