[Scummvm-git-logs] scummvm master -> fe5c2068aa357c2b42c796958e83416777d46a47
fracturehill
noreply at scummvm.org
Mon May 6 19:52:33 UTC 2024
This automated email contains information about 1 new commit which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .
Summary:
fe5c2068aa AGS: Fix 2bpp tinted blitting with AVX2 optimizations
Commit: fe5c2068aa357c2b42c796958e83416777d46a47
https://github.com/scummvm/scummvm/commit/fe5c2068aa357c2b42c796958e83416777d46a47
Author: Kaloyan Chehlarski (strahy at outlook.com)
Date: 2024-05-06T21:52:03+02:00
Commit Message:
AGS: Fix 2bpp tinted blitting with AVX2 optimizations
Tinted blitting was completely broken on the 2bpp AVX2
path, due to the code ignoring the boundary in the
middle of the wide registers. This change fixes the vertical
striping produced by the original code, and removes some
unnecessary operations that were being performed for
every group of pixels.
Changed paths:
engines/ags/lib/allegro/surface_avx2.cpp
diff --git a/engines/ags/lib/allegro/surface_avx2.cpp b/engines/ags/lib/allegro/surface_avx2.cpp
index 0fdd7992809..716766120b5 100644
--- a/engines/ags/lib/allegro/surface_avx2.cpp
+++ b/engines/ags/lib/allegro/surface_avx2.cpp
@@ -380,15 +380,15 @@ static inline __m256i blendPixelSIMD2Bpp(__m256i srcCols, __m256i destCols, __m2
return _mm256_or_si256(ch1, ch2);
case kTintBlenderMode:
case kTintLightBlenderMode:
- __m256i srcColsLo = simd2BppTo4Bpp(_mm256_and_si256(srcCols, _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1)));
- __m256i srcColsHi = simd2BppTo4Bpp(_mm256_srli_si256(srcCols, 16));
- __m256i destColsLo = simd2BppTo4Bpp(_mm256_and_si256(destCols, _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1)));
- __m256i destColsHi = simd2BppTo4Bpp(_mm256_srli_si256(destCols, 16));
- __m256i alphasLo = _mm256_unpacklo_epi16(_mm256_and_si256(alphas, _mm256_set_epi32(0, 0, 0, 0, -1, -1, -1, -1)), _mm256_setzero_si256());
- __m256i alphasHi = _mm256_unpacklo_epi16(_mm256_srli_si256(alphas, 16), _mm256_setzero_si256());
+ __m256i srcColsLo = simd2BppTo4Bpp(srcCols);
+ __m256i srcColsHi = simd2BppTo4Bpp(_mm256_permute2x128_si256(srcCols, srcCols, _MM_SHUFFLE(2, 0, 0, 1)));
+ __m256i destColsLo = simd2BppTo4Bpp(destCols);
+ __m256i destColsHi = simd2BppTo4Bpp(_mm256_permute2x128_si256(destCols, destCols, _MM_SHUFFLE(2, 0, 0, 1)));
+ __m256i alphasLo = _mm256_unpacklo_epi16(alphas, _mm256_setzero_si256());
+ __m256i alphasHi = _mm256_unpacklo_epi16(_mm256_permute2x128_si256(alphas, alphas, _MM_SHUFFLE(2, 3, 0, 1)), _mm256_setzero_si256());
__m256i lo = simd4BppTo2Bpp(blendTintSpriteSIMD(srcColsLo, destColsLo, alphasLo, _G(_blender_mode) == kTintLightBlenderMode));
__m256i hi = simd4BppTo2Bpp(blendTintSpriteSIMD(srcColsHi, destColsHi, alphasHi, _G(_blender_mode) == kTintLightBlenderMode));
- return _mm256_or_si256(lo, _mm256_slli_si256(hi, 16));
+ return _mm256_or_si256(lo, _mm256_permute2x128_si256(hi, hi, _MM_SHUFFLE(0, 0, 2, 0)));
}
return _mm256_setzero_si256();
}
More information about the Scummvm-git-logs
mailing list