[Scummvm-git-logs] scummvm master -> b0de3e786c602d1dcb9f3c1db6b9a4911944f973
fracturehill
noreply at scummvm.org
Sat Oct 21 21:26:19 UTC 2023
This automated email contains information about 5 new commits which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .
Summary:
23e58fc250 SLUDGE: Fix another crash when drawing text
b115d8d019 GRAPHICS: Fix SSE2 path MultiplyBlend
c82dd21fa7 GRAPHICS: JANITORIAL: Convert spaces to tabs
0d0aa24df3 GRAPHICS: Allow AVX2 optimized blit path to build
b0de3e786c GRAPHICS: Fix AVX2 path for MultiplyBlend
Commit: 23e58fc25041a3ff6cd302f7bc76b195613e75fe
https://github.com/scummvm/scummvm/commit/23e58fc25041a3ff6cd302f7bc76b195613e75fe
Author: Kaloyan Chehlarski (strahy at outlook.com)
Date: 2023-10-22T00:25:56+03:00
Commit Message:
SLUDGE: Fix another crash when drawing text
This is a more complete fix for the crash that happens
when attempting to draw a sprite with a dimension of
size 0 (e.g. the space character in Out of Order's font). This
fix targets the other two functions that can draw text,
pasteSpriteToBackDrop() and burnSpriteToBackDrop().
Changed paths:
engines/sludge/sprites.cpp
diff --git a/engines/sludge/sprites.cpp b/engines/sludge/sprites.cpp
index 3e0579af7cd..a073d629e1d 100644
--- a/engines/sludge/sprites.cpp
+++ b/engines/sludge/sprites.cpp
@@ -265,6 +265,11 @@ bool GraphicsManager::loadSpriteBank(int fileNum, SpriteBank &loadhere, bool isF
// pasteSpriteToBackDrop uses the colour specified by the setPasteColour (or setPasteColor)
void GraphicsManager::pasteSpriteToBackDrop(int x1, int y1, Sprite &single, const SpritePalette &fontPal) {
+ if (!single.surface.w || !single.surface.h) {
+ // Skip surfaces with a 0 width/height (e.g. the space character on Out of Order) to avoid crashes in the blitting code.
+ return;
+ }
+
// kill zBuffer
if (_zBuffer->originalNum >= 0 && _zBuffer->sprites) {
int num = _zBuffer->originalNum;
@@ -282,6 +287,11 @@ void GraphicsManager::pasteSpriteToBackDrop(int x1, int y1, Sprite &single, cons
// burnSpriteToBackDrop adds text in the colour specified by setBurnColour
// using the differing brightness levels of the font to achieve an anti-aliasing effect.
void GraphicsManager::burnSpriteToBackDrop(int x1, int y1, Sprite &single, const SpritePalette &fontPal) {
+ if (!single.surface.w || !single.surface.h) {
+ // Skip surfaces with a 0 width/height (e.g. the space character on Out of Order) to avoid crashes in the blitting code.
+ return;
+ }
+
// kill zBuffer
if (_zBuffer->originalNum >= 0 && _zBuffer->sprites) {
int num = _zBuffer->originalNum;
Commit: b115d8d019e9328f5307b8b8da01861120b8179a
https://github.com/scummvm/scummvm/commit/b115d8d019e9328f5307b8b8da01861120b8179a
Author: Kaloyan Chehlarski (strahy at outlook.com)
Date: 2023-10-22T00:25:56+03:00
Commit Message:
GRAPHICS: Fix SSE2 path MultiplyBlend
The multiply blender in the SSE2 optimized path for
TransparentSurface blitting no longer produces wrong
colors and incorrect alpha blending.
Changed paths:
graphics/blit/blit-sse2.cpp
diff --git a/graphics/blit/blit-sse2.cpp b/graphics/blit/blit-sse2.cpp
index 62fac9c11d6..fb1362ba0b4 100644
--- a/graphics/blit/blit-sse2.cpp
+++ b/graphics/blit/blit-sse2.cpp
@@ -98,36 +98,41 @@ struct AlphaBlend {
template<bool doscale, bool rgbmod, bool alphamod>
struct MultiplyBlend {
static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m128i ina;
- if (alphamod)
+ __m128i ina, alphaMask;
+ if (alphamod) {
ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(ca)), 8);
- else
+ alphaMask = _mm_cmpeq_epi32(ina, _mm_setzero_si128());
+ } else {
ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
- __m128i alphaMask = _mm_cmpeq_epi32(ina, _mm_setzero_si128());
-
- if (rgbmod) {
- __m128i srcb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m128i srcg = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i srcr = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m128i dstb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m128i dstg = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i dstr = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
-
- srcb = _mm_and_si128(_mm_srli_epi32(sse2_mul32(dstb, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcb, _mm_set1_epi32(cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm_and_si128(_mm_srli_epi32(sse2_mul32(dstg, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcg, _mm_set1_epi32(cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm_and_si128(_mm_srli_epi32(sse2_mul32(dstr, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcr, _mm_set1_epi32(cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm_set1_epi32(BlendBlit::kRModMask));
+ alphaMask = _mm_set1_epi32(BlendBlit::kAModMask);
+ }
+
+ if (rgbmod) {
+ __m128i srcB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i srcR = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m128i dstB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i dstR = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+
+ srcB = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstB, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcB, _mm_set1_epi32(cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm_set1_epi32(BlendBlit::kBModMask));
+ srcG = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcG, _mm_set1_epi32(cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm_set1_epi32(BlendBlit::kGModMask));
+ srcR = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstR, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcR, _mm_set1_epi32(cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm_set1_epi32(BlendBlit::kRModMask));
+
+ src = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
+ src = _mm_or_si128(src, _mm_or_si128(srcB, _mm_or_si128(srcG, srcR)));
+ } else {
+ constexpr uint32 rbMask = BlendBlit::kRModMask | BlendBlit::kBModMask;
+ __m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i srcRB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(rbMask)), BlendBlit::kBModShift);
+ __m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i dstRB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(rbMask)), BlendBlit::kBModShift);
+ srcG = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8)), 8), _mm_set1_epi32(BlendBlit::kGModMask));
+ srcRB = _mm_and_si128(_mm_mullo_epi16(dstRB, _mm_srli_epi32(_mm_and_si128(sse2_mul32(srcRB, ina), _mm_set1_epi32(rbMask)), 8)), _mm_set1_epi32(rbMask));
+
src = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
- src = _mm_or_si128(src, _mm_or_si128(srcb, _mm_or_si128(srcg, srcr)));
- } else {
- __m128i srcg = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask));
- __m128i srcrb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m128i dstg = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask));
- __m128i dstrb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- srcg = _mm_and_si128(_mm_srli_epi32(sse2_mul32(dstg, _mm_srli_epi32(sse2_mul32(srcg, ina), 8)), 8), _mm_set1_epi32(BlendBlit::kGModMask));
- srcrb = _mm_and_si128(sse2_mul32(dstrb, _mm_srli_epi32(sse2_mul32(srcrb, ina), 8)), _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
- src = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
- src = _mm_or_si128(src, _mm_or_si128(srcrb, srcg));
+ src = _mm_or_si128(src, _mm_or_si128(srcRB, srcG));
}
dst = _mm_and_si128(alphaMask, dst);
Commit: c82dd21fa701a6659b640dc7259328e42f65ad41
https://github.com/scummvm/scummvm/commit/c82dd21fa701a6659b640dc7259328e42f65ad41
Author: Kaloyan Chehlarski (strahy at outlook.com)
Date: 2023-10-22T00:25:56+03:00
Commit Message:
GRAPHICS: JANITORIAL: Convert spaces to tabs
Changed paths:
graphics/blit/blit-alpha.cpp
graphics/blit/blit-avx2.cpp
graphics/blit/blit-neon.cpp
graphics/blit/blit-scale.cpp
graphics/blit/blit-sse2.cpp
diff --git a/graphics/blit/blit-alpha.cpp b/graphics/blit/blit-alpha.cpp
index 75ecde97159..bcf7b4cba4a 100644
--- a/graphics/blit/blit-alpha.cpp
+++ b/graphics/blit/blit-alpha.cpp
@@ -29,176 +29,176 @@ namespace {
template<typename Size, bool overwriteAlpha>
inline void applyColorKeyLogic(byte *dst, const byte *src, const uint w, const uint h,
- const uint srcDelta, const uint dstDelta,
- const Graphics::PixelFormat &format,
- const uint8 rKey, const uint8 gKey, const uint8 bKey,
- const uint8 rNew, const uint8 gNew, const uint8 bNew) {
-
- const uint32 keyPix = format.ARGBToColor(0, rKey, gKey, bKey);
- const uint32 newPix = format.ARGBToColor(0, rNew, gNew, bNew);
- const uint32 rgbMask = format.ARGBToColor(0, 255, 255, 255);
- const uint32 alphaMask = format.ARGBToColor(255, 0, 0, 0);
-
- for (uint y = 0; y < h; ++y) {
- for (uint x = 0; x < w; ++x) {
- uint32 pix = *(const Size *)src;
-
- if ((pix & rgbMask) == keyPix) {
- *(Size *)dst = newPix;
- } else if (overwriteAlpha) {
- *(Size *)dst = pix | alphaMask;
- }
-
- src += sizeof(Size);
- dst += sizeof(Size);
- }
-
- src += srcDelta;
- dst += dstDelta;
- }
+ const uint srcDelta, const uint dstDelta,
+ const Graphics::PixelFormat &format,
+ const uint8 rKey, const uint8 gKey, const uint8 bKey,
+ const uint8 rNew, const uint8 gNew, const uint8 bNew) {
+
+ const uint32 keyPix = format.ARGBToColor(0, rKey, gKey, bKey);
+ const uint32 newPix = format.ARGBToColor(0, rNew, gNew, bNew);
+ const uint32 rgbMask = format.ARGBToColor(0, 255, 255, 255);
+ const uint32 alphaMask = format.ARGBToColor(255, 0, 0, 0);
+
+ for (uint y = 0; y < h; ++y) {
+ for (uint x = 0; x < w; ++x) {
+ uint32 pix = *(const Size *)src;
+
+ if ((pix & rgbMask) == keyPix) {
+ *(Size *)dst = newPix;
+ } else if (overwriteAlpha) {
+ *(Size *)dst = pix | alphaMask;
+ }
+
+ src += sizeof(Size);
+ dst += sizeof(Size);
+ }
+
+ src += srcDelta;
+ dst += dstDelta;
+ }
}
template<typename Size, bool skipTransparent>
inline void setAlphaLogic(byte *dst, const byte *src, const uint w, const uint h,
- const uint srcDelta, const uint dstDelta,
- const Graphics::PixelFormat &format, const uint8 alpha) {
+ const uint srcDelta, const uint dstDelta,
+ const Graphics::PixelFormat &format, const uint8 alpha) {
- const uint32 newAlpha = format.ARGBToColor(alpha, 0, 0, 0);
- const uint32 rgbMask = format.ARGBToColor(0, 255, 255, 255);
- const uint32 alphaMask = format.ARGBToColor(255, 0, 0, 0);
+ const uint32 newAlpha = format.ARGBToColor(alpha, 0, 0, 0);
+ const uint32 rgbMask = format.ARGBToColor(0, 255, 255, 255);
+ const uint32 alphaMask = format.ARGBToColor(255, 0, 0, 0);
- for (uint y = 0; y < h; ++y) {
- for (uint x = 0; x < w; ++x) {
- uint32 pix = *(const Size *)src;
+ for (uint y = 0; y < h; ++y) {
+ for (uint x = 0; x < w; ++x) {
+ uint32 pix = *(const Size *)src;
- if (!skipTransparent || (pix & alphaMask))
- *(Size *)dst = (pix & rgbMask) | newAlpha;
- else
- *(Size *)dst = pix;
+ if (!skipTransparent || (pix & alphaMask))
+ *(Size *)dst = (pix & rgbMask) | newAlpha;
+ else
+ *(Size *)dst = pix;
- src += sizeof(Size);
- dst += sizeof(Size);
- }
+ src += sizeof(Size);
+ dst += sizeof(Size);
+ }
- src += srcDelta;
- dst += dstDelta;
- }
+ src += srcDelta;
+ dst += dstDelta;
+ }
}
} // End of anonymous namespace
// Function to merge a transparent color key with the alpha channel
bool applyColorKey(byte *dst, const byte *src,
- const uint dstPitch, const uint srcPitch,
- const uint w, const uint h,
- const Graphics::PixelFormat &format, const bool overwriteAlpha,
- const uint8 rKey, const uint8 gKey, const uint8 bKey,
- const uint8 rNew, const uint8 gNew, const uint8 bNew) {
-
- // Faster, but larger, to provide optimized handling for each case.
- const uint srcDelta = (srcPitch - w * format.bytesPerPixel);
- const uint dstDelta = (dstPitch - w * format.bytesPerPixel);
-
- if (format.aBits() == 0) {
- return false;
- }
-
- if (overwriteAlpha) {
- if (format.bytesPerPixel == 1) {
- applyColorKeyLogic<uint8, true>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
- } else if (format.bytesPerPixel == 2) {
- applyColorKeyLogic<uint16, true>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
- } else if (format.bytesPerPixel == 4) {
- applyColorKeyLogic<uint32, true>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
- } else {
- return false;
- }
- } else {
- if (format.bytesPerPixel == 1) {
- applyColorKeyLogic<uint8, false>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
- } else if (format.bytesPerPixel == 2) {
- applyColorKeyLogic<uint16, false>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
- } else if (format.bytesPerPixel == 4) {
- applyColorKeyLogic<uint32, false>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
- } else {
- return false;
- }
- }
-
- return true;
+ const uint dstPitch, const uint srcPitch,
+ const uint w, const uint h,
+ const Graphics::PixelFormat &format, const bool overwriteAlpha,
+ const uint8 rKey, const uint8 gKey, const uint8 bKey,
+ const uint8 rNew, const uint8 gNew, const uint8 bNew) {
+
+ // Faster, but larger, to provide optimized handling for each case.
+ const uint srcDelta = (srcPitch - w * format.bytesPerPixel);
+ const uint dstDelta = (dstPitch - w * format.bytesPerPixel);
+
+ if (format.aBits() == 0) {
+ return false;
+ }
+
+ if (overwriteAlpha) {
+ if (format.bytesPerPixel == 1) {
+ applyColorKeyLogic<uint8, true>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
+ } else if (format.bytesPerPixel == 2) {
+ applyColorKeyLogic<uint16, true>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
+ } else if (format.bytesPerPixel == 4) {
+ applyColorKeyLogic<uint32, true>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
+ } else {
+ return false;
+ }
+ } else {
+ if (format.bytesPerPixel == 1) {
+ applyColorKeyLogic<uint8, false>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
+ } else if (format.bytesPerPixel == 2) {
+ applyColorKeyLogic<uint16, false>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
+ } else if (format.bytesPerPixel == 4) {
+ applyColorKeyLogic<uint32, false>(dst, src, w, h, srcDelta, dstDelta, format, rKey, gKey, bKey, rNew, gNew, bNew);
+ } else {
+ return false;
+ }
+ }
+
+ return true;
}
// Function to set the alpha channel for all pixels to the specified value
bool setAlpha(byte *dst, const byte *src,
- const uint dstPitch, const uint srcPitch,
- const uint w, const uint h,
- const Graphics::PixelFormat &format,
- const bool skipTransparent, const uint8 alpha) {
-
- // Faster, but larger, to provide optimized handling for each case.
- const uint srcDelta = (srcPitch - w * format.bytesPerPixel);
- const uint dstDelta = (dstPitch - w * format.bytesPerPixel);
-
- if (format.aBits() == 0) {
- return false;
- }
-
- if (skipTransparent) {
- if (format.bytesPerPixel == 1) {
- setAlphaLogic<uint8, true>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
- } else if (format.bytesPerPixel == 2) {
- setAlphaLogic<uint16, true>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
- } else if (format.bytesPerPixel == 4) {
- setAlphaLogic<uint32, true>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
- } else {
- return false;
- }
- } else {
- if (format.bytesPerPixel == 1) {
- setAlphaLogic<uint8, false>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
- } else if (format.bytesPerPixel == 2) {
- setAlphaLogic<uint16, false>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
- } else if (format.bytesPerPixel == 4) {
- setAlphaLogic<uint32, false>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
- } else {
- return false;
- }
- }
-
- return true;
+ const uint dstPitch, const uint srcPitch,
+ const uint w, const uint h,
+ const Graphics::PixelFormat &format,
+ const bool skipTransparent, const uint8 alpha) {
+
+ // Faster, but larger, to provide optimized handling for each case.
+ const uint srcDelta = (srcPitch - w * format.bytesPerPixel);
+ const uint dstDelta = (dstPitch - w * format.bytesPerPixel);
+
+ if (format.aBits() == 0) {
+ return false;
+ }
+
+ if (skipTransparent) {
+ if (format.bytesPerPixel == 1) {
+ setAlphaLogic<uint8, true>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
+ } else if (format.bytesPerPixel == 2) {
+ setAlphaLogic<uint16, true>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
+ } else if (format.bytesPerPixel == 4) {
+ setAlphaLogic<uint32, true>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
+ } else {
+ return false;
+ }
+ } else {
+ if (format.bytesPerPixel == 1) {
+ setAlphaLogic<uint8, false>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
+ } else if (format.bytesPerPixel == 2) {
+ setAlphaLogic<uint16, false>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
+ } else if (format.bytesPerPixel == 4) {
+ setAlphaLogic<uint32, false>(dst, src, w, h, srcDelta, dstDelta, format, alpha);
+ } else {
+ return false;
+ }
+ }
+
+ return true;
}
BlendBlit::Args::Args(byte *dst, const byte *src,
- const uint _dstPitch, const uint _srcPitch,
- const int posX, const int posY,
- const uint _width, const uint _height,
- const int _scaleX, const int _scaleY,
- const int scaleXsrcOff, const int scaleYsrcOff,
- const uint32 colorMod, const uint _flipping) :
- xp(0), yp(0), dstPitch(_dstPitch),
- width(_width), height(_height), color(colorMod),
- scaleX(_scaleX), scaleY(_scaleY), flipping(_flipping),
- scaleXoff(scaleXsrcOff), scaleYoff(scaleYsrcOff) {
- bool doScale = scaleX != SCALE_THRESHOLD || scaleY != SCALE_THRESHOLD;
-
- rgbmod = ((colorMod & kRGBModMask) != kRGBModMask);
- alphamod = ((colorMod & kAModMask) != kAModMask);
- inStep = 4;
- inoStep = _srcPitch;
- if (flipping & FLIP_H) {
- inStep = -inStep;
- xp = width - 1;
- if (doScale) xp = xp * scaleX / SCALE_THRESHOLD;
- }
-
- if (flipping & FLIP_V) {
- inoStep = -inoStep;
- yp = height - 1;
- if (doScale) yp = yp * scaleY / SCALE_THRESHOLD;
- }
-
- ino = src + yp * _srcPitch + xp * 4;
- outo = dst + posY * _dstPitch + posX * 4;
+ const uint _dstPitch, const uint _srcPitch,
+ const int posX, const int posY,
+ const uint _width, const uint _height,
+ const int _scaleX, const int _scaleY,
+ const int scaleXsrcOff, const int scaleYsrcOff,
+ const uint32 colorMod, const uint _flipping) :
+ xp(0), yp(0), dstPitch(_dstPitch),
+ width(_width), height(_height), color(colorMod),
+ scaleX(_scaleX), scaleY(_scaleY), flipping(_flipping),
+ scaleXoff(scaleXsrcOff), scaleYoff(scaleYsrcOff) {
+ bool doScale = scaleX != SCALE_THRESHOLD || scaleY != SCALE_THRESHOLD;
+
+ rgbmod = ((colorMod & kRGBModMask) != kRGBModMask);
+ alphamod = ((colorMod & kAModMask) != kAModMask);
+ inStep = 4;
+ inoStep = _srcPitch;
+ if (flipping & FLIP_H) {
+ inStep = -inStep;
+ xp = width - 1;
+ if (doScale) xp = xp * scaleX / SCALE_THRESHOLD;
+ }
+
+ if (flipping & FLIP_V) {
+ inoStep = -inoStep;
+ yp = height - 1;
+ if (doScale) yp = yp * scaleY / SCALE_THRESHOLD;
+ }
+
+ ino = src + yp * _srcPitch + xp * 4;
+ outo = dst + posY * _dstPitch + posX * 4;
}
// Initialize this to nullptr at the start
@@ -209,33 +209,33 @@ BlendBlit::BlitFunc BlendBlit::blitFunc = nullptr;
// BlendBlit::blitFunc. This way, we can detect at runtime whether or not
// the cpu has certain SIMD feature enabled or not.
void BlendBlit::blit(byte *dst, const byte *src,
- const uint dstPitch, const uint srcPitch,
- const int posX, const int posY,
- const uint width, const uint height,
- const int scaleX, const int scaleY,
- const int scaleXsrcOff, const int scaleYsrcOff,
- const uint32 colorMod, const uint flipping,
- const TSpriteBlendMode blendMode,
- const AlphaType alphaType) {
- if (width == 0 || height == 0) return;
-
- // If no function has been selected yet, detect and select
- if (!blitFunc) {
- // Get the correct blit function
- blitFunc = blitGeneric;
+ const uint dstPitch, const uint srcPitch,
+ const int posX, const int posY,
+ const uint width, const uint height,
+ const int scaleX, const int scaleY,
+ const int scaleXsrcOff, const int scaleYsrcOff,
+ const uint32 colorMod, const uint flipping,
+ const TSpriteBlendMode blendMode,
+ const AlphaType alphaType) {
+ if (width == 0 || height == 0) return;
+
+ // If no function has been selected yet, detect and select
+ if (!blitFunc) {
+ // Get the correct blit function
+ blitFunc = blitGeneric;
#ifdef SCUMMVM_NEON
- if (g_system->hasFeature(OSystem::kFeatureCpuNEON)) blitFunc = blitNEON;
+ if (g_system->hasFeature(OSystem::kFeatureCpuNEON)) blitFunc = blitNEON;
#endif
#ifdef SCUMMVM_SSE2
- if (g_system->hasFeature(OSystem::kFeatureCpuSSE2)) blitFunc = blitSSE2;
+ if (g_system->hasFeature(OSystem::kFeatureCpuSSE2)) blitFunc = blitSSE2;
#endif
#ifdef SCUMMVM_AVX2
- if (g_system->hasFeature(OSystem::kFeatureCpuAVX2)) blitFunc = blitAVX2;
+ if (g_system->hasFeature(OSystem::kFeatureCpuAVX2)) blitFunc = blitAVX2;
#endif
- }
-
- Args args(dst, src, dstPitch, srcPitch, posX, posY, width, height, scaleX, scaleY, scaleXsrcOff, scaleYsrcOff, colorMod, flipping);
- blitFunc(args, blendMode, alphaType);
+ }
+
+ Args args(dst, src, dstPitch, srcPitch, posX, posY, width, height, scaleX, scaleY, scaleXsrcOff, scaleYsrcOff, colorMod, flipping);
+ blitFunc(args, blendMode, alphaType);
}
class BlendBlitImpl {
@@ -245,116 +245,116 @@ public:
*/
template<bool doscale, bool rgbmod, bool alphamod>
static void doBlitMultiplyBlendLogicGeneric(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
- const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
- const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
- const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
- const uint32 cr = rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256;
- const uint32 cg = rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256;
- const uint32 cb = rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
- for (uint32 j = 0; j < args.width; j++) {
- if (doscale) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- }
-
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
- }
-
- if (doscale)
- scaleXCtr += args.scaleX;
- else
- in += args.inStep;
- out += 4;
- }
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
+ const byte *in;
+ byte *out;
+
+ int scaleXCtr, scaleYCtr = args.scaleYoff;
+ const byte *inBase;
+
+ const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
+ const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
+ const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
+ const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
+ const uint32 cr = rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256;
+ const uint32 cg = rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256;
+ const uint32 cb = rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256;
+
+ for (uint32 i = 0; i < args.height; i++) {
+ if (doscale) {
+ inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+ scaleXCtr = args.scaleXoff;
+ } else {
+ in = args.ino;
+ }
+ out = args.outo;
+ for (uint32 j = 0; j < args.width; j++) {
+ if (doscale) {
+ in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+ }
+
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
+ }
+
+ if (doscale)
+ scaleXCtr += args.scaleX;
+ else
+ in += args.inStep;
+ out += 4;
+ }
+ if (doscale)
+ scaleYCtr += args.scaleY;
+ else
+ args.ino += args.inoStep;
+ args.outo += args.dstPitch;
+ }
}
template<bool doscale, bool rgbmod, bool alphamod>
static void doBlitAlphaBlendLogicGeneric(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
- const byte cr = rgbmod ? ((args.color >> BlendBlit::kRModShift) & 0xFF) : 255;
- const byte cg = rgbmod ? ((args.color >> BlendBlit::kGModShift) & 0xFF) : 255;
- const byte cb = rgbmod ? ((args.color >> BlendBlit::kBModShift) & 0xFF) : 255;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
- for (uint32 j = 0; j < args.width; j++) {
- if (doscale) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- }
-
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- if (rgbmod) {
- const uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
- const uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
- const uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
-
- out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
- out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
- out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
- } else {
- out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * (255 - ina) + in[BlendBlit::kBIndex] * ina) >> 8;
- out[BlendBlit::kGIndex] = (out[BlendBlit::kGIndex] * (255 - ina) + in[BlendBlit::kGIndex] * ina) >> 8;
- out[BlendBlit::kRIndex] = (out[BlendBlit::kRIndex] * (255 - ina) + in[BlendBlit::kRIndex] * ina) >> 8;
-
- }
- }
-
- if (doscale)
- scaleXCtr += args.scaleX;
- else
- in += args.inStep;
- out += 4;
- }
-
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
+ const byte *in;
+ byte *out;
+
+ int scaleXCtr, scaleYCtr = args.scaleYoff;
+ const byte *inBase;
+
+ const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
+ const byte cr = rgbmod ? ((args.color >> BlendBlit::kRModShift) & 0xFF) : 255;
+ const byte cg = rgbmod ? ((args.color >> BlendBlit::kGModShift) & 0xFF) : 255;
+ const byte cb = rgbmod ? ((args.color >> BlendBlit::kBModShift) & 0xFF) : 255;
+
+ for (uint32 i = 0; i < args.height; i++) {
+ if (doscale) {
+ inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+ scaleXCtr = args.scaleXoff;
+ } else {
+ in = args.ino;
+ }
+ out = args.outo;
+ for (uint32 j = 0; j < args.width; j++) {
+ if (doscale) {
+ in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+ }
+
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ if (rgbmod) {
+ const uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
+ const uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
+ const uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
+
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
+ out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
+ out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
+ } else {
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * (255 - ina) + in[BlendBlit::kBIndex] * ina) >> 8;
+ out[BlendBlit::kGIndex] = (out[BlendBlit::kGIndex] * (255 - ina) + in[BlendBlit::kGIndex] * ina) >> 8;
+ out[BlendBlit::kRIndex] = (out[BlendBlit::kRIndex] * (255 - ina) + in[BlendBlit::kRIndex] * ina) >> 8;
+
+ }
+ }
+
+ if (doscale)
+ scaleXCtr += args.scaleX;
+ else
+ in += args.inStep;
+ out += 4;
+ }
+
+ if (doscale)
+ scaleYCtr += args.scaleY;
+ else
+ args.ino += args.inoStep;
+ args.outo += args.dstPitch;
+ }
}
/**
@@ -362,49 +362,49 @@ static void doBlitAlphaBlendLogicGeneric(BlendBlit::Args &args) {
*/
template<bool doscale, bool rgbmod>
static void doBlitSubtractiveBlendLogicGeneric(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
- const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
- const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
- const uint32 cr = rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256;
- const uint32 cg = rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256;
- const uint32 cb = rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
- for (uint32 j = 0; j < args.width; j++) {
- if (doscale) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- }
-
- out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-
- if (doscale)
- scaleXCtr += args.scaleX;
- else
- in += args.inStep;
- out += 4;
- }
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
+ const byte *in;
+ byte *out;
+
+ int scaleXCtr, scaleYCtr = args.scaleYoff;
+ const byte *inBase;
+
+ const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
+ const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
+ const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
+ const uint32 cr = rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256;
+ const uint32 cg = rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256;
+ const uint32 cb = rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256;
+
+ for (uint32 i = 0; i < args.height; i++) {
+ if (doscale) {
+ inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+ scaleXCtr = args.scaleXoff;
+ } else {
+ in = args.ino;
+ }
+ out = args.outo;
+ for (uint32 j = 0; j < args.width; j++) {
+ if (doscale) {
+ in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+ }
+
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+
+ if (doscale)
+ scaleXCtr += args.scaleX;
+ else
+ in += args.inStep;
+ out += 4;
+ }
+ if (doscale)
+ scaleYCtr += args.scaleY;
+ else
+ args.ino += args.inoStep;
+ args.outo += args.dstPitch;
+ }
}
/**
@@ -412,257 +412,257 @@ static void doBlitSubtractiveBlendLogicGeneric(BlendBlit::Args &args) {
*/
template<bool doscale, bool rgbmod, bool alphamod>
static void doBlitAdditiveBlendLogicGeneric(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
- const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
- const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
- const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
- const uint32 cr = rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256;
- const uint32 cg = rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256;
- const uint32 cb = rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
- for (uint32 j = 0; j < args.width; j++) {
- if (doscale) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- }
-
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
- }
-
- if (doscale)
- scaleXCtr += args.scaleX;
- else
- in += args.inStep;
- out += 4;
- }
-
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
+ const byte *in;
+ byte *out;
+
+ int scaleXCtr, scaleYCtr = args.scaleYoff;
+ const byte *inBase;
+
+ const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
+ const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
+ const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
+ const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
+ const uint32 cr = rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256;
+ const uint32 cg = rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256;
+ const uint32 cb = rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256;
+
+ for (uint32 i = 0; i < args.height; i++) {
+ if (doscale) {
+ inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+ scaleXCtr = args.scaleXoff;
+ } else {
+ in = args.ino;
+ }
+ out = args.outo;
+ for (uint32 j = 0; j < args.width; j++) {
+ if (doscale) {
+ in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+ }
+
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
+ }
+
+ if (doscale)
+ scaleXCtr += args.scaleX;
+ else
+ in += args.inStep;
+ out += 4;
+ }
+
+ if (doscale)
+ scaleYCtr += args.scaleY;
+ else
+ args.ino += args.inoStep;
+ args.outo += args.dstPitch;
+ }
}
template<bool doscale>
static void doBlitOpaqueBlendLogicGeneric(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + (scaleYCtr + 1) / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
-
- if (doscale) {
- for (uint32 j = 0; j < args.width; j++) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
- scaleXCtr += args.scaleX;
- out += 4;
- }
- } else {
- for (uint32 j = 0; j < args.width; j++) {
- *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
- in += args.inStep;
- out += 4;
- }
- }
-
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
+ const byte *in;
+ byte *out;
+
+ int scaleXCtr, scaleYCtr = args.scaleYoff;
+ const byte *inBase;
+
+ for (uint32 i = 0; i < args.height; i++) {
+ if (doscale) {
+ inBase = args.ino + (scaleYCtr + 1) / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+ scaleXCtr = args.scaleXoff;
+ } else {
+ in = args.ino;
+ }
+ out = args.outo;
+
+ if (doscale) {
+ for (uint32 j = 0; j < args.width; j++) {
+ in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+ *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
+ scaleXCtr += args.scaleX;
+ out += 4;
+ }
+ } else {
+ for (uint32 j = 0; j < args.width; j++) {
+ *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
+ in += args.inStep;
+ out += 4;
+ }
+ }
+
+ if (doscale)
+ scaleYCtr += args.scaleY;
+ else
+ args.ino += args.inoStep;
+ args.outo += args.dstPitch;
+ }
}
template<bool doscale>
static void doBlitBinaryBlendLogicGeneric(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
- for (uint32 j = 0; j < args.width; j++) {
- if (doscale) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- }
-
- uint32 pix = *(const uint32 *)in, pixout = *(const uint32 *)out;
- uint32 mask = (pix & BlendBlit::kAModMask) ? 0xffffffff : 0;
- pixout &= ~mask;
- pix = (pix | BlendBlit::kAModMask) & mask;
- *(uint32 *)out = pixout | pix;
-
- if (doscale)
- scaleXCtr += args.scaleX;
- else
- in += args.inStep;
- out += 4;
- }
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
+ const byte *in;
+ byte *out;
+
+ int scaleXCtr, scaleYCtr = args.scaleYoff;
+ const byte *inBase;
+
+ for (uint32 i = 0; i < args.height; i++) {
+ if (doscale) {
+ inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+ scaleXCtr = args.scaleXoff;
+ } else {
+ in = args.ino;
+ }
+ out = args.outo;
+ for (uint32 j = 0; j < args.width; j++) {
+ if (doscale) {
+ in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+ }
+
+ uint32 pix = *(const uint32 *)in, pixout = *(const uint32 *)out;
+ uint32 mask = (pix & BlendBlit::kAModMask) ? 0xffffffff : 0;
+ pixout &= ~mask;
+ pix = (pix | BlendBlit::kAModMask) & mask;
+ *(uint32 *)out = pixout | pix;
+
+ if (doscale)
+ scaleXCtr += args.scaleX;
+ else
+ in += args.inStep;
+ out += 4;
+ }
+ if (doscale)
+ scaleYCtr += args.scaleY;
+ else
+ args.ino += args.inoStep;
+ args.outo += args.dstPitch;
+ }
}
}; // end of class BlendBlitImpl
void BlendBlit::blitGeneric(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
- bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
- bool alphamod = ((args.color & kAModMask) != kAModMask);
- if (args.scaleX == BlendBlit::SCALE_THRESHOLD && args.scaleY == BlendBlit::SCALE_THRESHOLD) {
- if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::doBlitOpaqueBlendLogicGeneric<false>(args);
- } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::doBlitBinaryBlendLogicGeneric<false>(args);
- } else {
- if (blendMode == BLEND_ADDITIVE) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, true, true>(args);
- } else {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, true, false>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, false, true>(args);
- } else {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, false, false>(args);
- }
- }
- } else if (blendMode == BLEND_SUBTRACTIVE) {
- if (rgbmod) {
- BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<false, true>(args);
- } else {
- BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<false, false>(args);
- }
- } else if (blendMode == BLEND_MULTIPLY) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, true, true>(args);
- } else {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, true, false>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, false, true>(args);
- } else {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, false, false>(args);
- }
- }
- } else {
- assert(blendMode == BLEND_NORMAL);
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, true, true>(args);
- } else {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, true, false>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, false, true>(args);
- } else {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, false, false>(args);
- }
- }
- }
- }
- } else {
- if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::doBlitOpaqueBlendLogicGeneric<true>(args);
- } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::doBlitBinaryBlendLogicGeneric<true>(args);
- } else {
- if (blendMode == BLEND_ADDITIVE) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, true, true>(args);
- } else {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, true, false>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, false, true>(args);
- } else {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, false, false>(args);
- }
- }
- } else if (blendMode == BLEND_SUBTRACTIVE) {
- if (rgbmod) {
- BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<true, true>(args);
- } else {
- BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<true, false>(args);
- }
- } else if (blendMode == BLEND_MULTIPLY) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, true, true>(args);
- } else {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, true, false>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, false, true>(args);
- } else {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, false, false>(args);
- }
- }
- } else {
- assert(blendMode == BLEND_NORMAL);
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, true, true>(args);
- } else {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, true, false>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, false, true>(args);
- } else {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, false, false>(args);
- }
- }
- }
- }
- }
+ bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
+ bool alphamod = ((args.color & kAModMask) != kAModMask);
+ if (args.scaleX == BlendBlit::SCALE_THRESHOLD && args.scaleY == BlendBlit::SCALE_THRESHOLD) {
+ if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+ BlendBlitImpl::doBlitOpaqueBlendLogicGeneric<false>(args);
+ } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+ BlendBlitImpl::doBlitBinaryBlendLogicGeneric<false>(args);
+ } else {
+ if (blendMode == BLEND_ADDITIVE) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, true, true>(args);
+ } else {
+ BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, true, false>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, false, true>(args);
+ } else {
+ BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, false, false>(args);
+ }
+ }
+ } else if (blendMode == BLEND_SUBTRACTIVE) {
+ if (rgbmod) {
+ BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<false, true>(args);
+ } else {
+ BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<false, false>(args);
+ }
+ } else if (blendMode == BLEND_MULTIPLY) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, true, true>(args);
+ } else {
+ BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, true, false>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, false, true>(args);
+ } else {
+ BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, false, false>(args);
+ }
+ }
+ } else {
+ assert(blendMode == BLEND_NORMAL);
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, true, true>(args);
+ } else {
+ BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, true, false>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, false, true>(args);
+ } else {
+ BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, false, false>(args);
+ }
+ }
+ }
+ }
+ } else {
+ if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+ BlendBlitImpl::doBlitOpaqueBlendLogicGeneric<true>(args);
+ } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+ BlendBlitImpl::doBlitBinaryBlendLogicGeneric<true>(args);
+ } else {
+ if (blendMode == BLEND_ADDITIVE) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, true, true>(args);
+ } else {
+ BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, true, false>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, false, true>(args);
+ } else {
+ BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, false, false>(args);
+ }
+ }
+ } else if (blendMode == BLEND_SUBTRACTIVE) {
+ if (rgbmod) {
+ BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<true, true>(args);
+ } else {
+ BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<true, false>(args);
+ }
+ } else if (blendMode == BLEND_MULTIPLY) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, true, true>(args);
+ } else {
+ BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, true, false>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, false, true>(args);
+ } else {
+ BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, false, false>(args);
+ }
+ }
+ } else {
+ assert(blendMode == BLEND_NORMAL);
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, true, true>(args);
+ } else {
+ BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, true, false>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, false, true>(args);
+ } else {
+ BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, false, false>(args);
+ }
+ }
+ }
+ }
+ }
}
} // End of namespace Graphics
diff --git a/graphics/blit/blit-avx2.cpp b/graphics/blit/blit-avx2.cpp
index 5e0bf936953..02dec39e1b1 100644
--- a/graphics/blit/blit-avx2.cpp
+++ b/graphics/blit/blit-avx2.cpp
@@ -29,439 +29,439 @@ namespace Graphics {
template<bool doscale, bool rgbmod, bool alphamod>
struct AlphaBlend {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m256i ina;
- if (alphamod)
- ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(ca)), 8);
- else
- ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
- __m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_setzero_si256());
-
- if (rgbmod) {
- __m256i dstR = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i dstB = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i srcR = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m256i srcG = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i srcB = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
-
- dstR = _mm256_slli_epi32(_mm256_mullo_epi16(dstR, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), BlendBlit::kRModShift - 8);
- dstG = _mm256_slli_epi32(_mm256_mullo_epi16(dstG, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), BlendBlit::kGModShift - 8);
- dstB = _mm256_mullo_epi16(dstB, _mm256_sub_epi32(_mm256_set1_epi32(255), ina));
- srcR = _mm256_add_epi32(dstR, _mm256_slli_epi32(_mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcR, ina), 8), _mm256_set1_epi32(cr)), BlendBlit::kRModShift - 8));
- srcG = _mm256_add_epi32(dstG, _mm256_slli_epi32(_mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcG, ina), 8), _mm256_set1_epi32(cg)), BlendBlit::kGModShift - 8));
- srcB = _mm256_add_epi32(dstB, _mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcB, ina), 8), _mm256_set1_epi32(cb)));
- src = _mm256_or_si256(_mm256_and_si256(srcB, _mm256_set1_epi32(BlendBlit::kBModMask)), _mm256_set1_epi32(BlendBlit::kAModMask));
- src = _mm256_or_si256(_mm256_and_si256(srcG, _mm256_set1_epi32(BlendBlit::kGModMask)), src);
- src = _mm256_or_si256(_mm256_and_si256(srcR, _mm256_set1_epi32(BlendBlit::kRModMask)), src);
- } else {
- __m256i dstRB = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i srcRB = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i srcG = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
-
- dstRB = _mm256_srli_epi32(_mm256_mullo_epi32(dstRB, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), 8);
- dstG = _mm256_srli_epi32(_mm256_mullo_epi16(dstG, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), 8);
- srcRB = _mm256_slli_epi32(_mm256_add_epi32(dstRB, _mm256_srli_epi32(_mm256_mullo_epi32(srcRB, ina), 8)), BlendBlit::kBModShift);
- srcG = _mm256_slli_epi32(_mm256_add_epi32(dstG, _mm256_srli_epi32(_mm256_mullo_epi16(srcG, ina), 8)), BlendBlit::kGModShift);
- src = _mm256_or_si256(_mm256_and_si256(srcG, _mm256_set1_epi32(BlendBlit::kGModMask)), _mm256_set1_epi32(BlendBlit::kAModMask));
- src = _mm256_or_si256(_mm256_and_si256(srcRB, _mm256_set1_epi32(BlendBlit::kBModMask | BlendBlit::kRModMask)), src);
- }
-
- dst = _mm256_and_si256(alphaMask, dst);
- src = _mm256_andnot_si256(alphaMask, src);
- return _mm256_or_si256(dst, src);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
- uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
- uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
-
- out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
- out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
- out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
- }
- }
+ static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ __m256i ina;
+ if (alphamod)
+ ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(ca)), 8);
+ else
+ ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
+ __m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_setzero_si256());
+
+ if (rgbmod) {
+ __m256i dstR = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i dstB = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i srcR = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m256i srcG = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i srcB = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+
+ dstR = _mm256_slli_epi32(_mm256_mullo_epi16(dstR, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), BlendBlit::kRModShift - 8);
+ dstG = _mm256_slli_epi32(_mm256_mullo_epi16(dstG, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), BlendBlit::kGModShift - 8);
+ dstB = _mm256_mullo_epi16(dstB, _mm256_sub_epi32(_mm256_set1_epi32(255), ina));
+ srcR = _mm256_add_epi32(dstR, _mm256_slli_epi32(_mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcR, ina), 8), _mm256_set1_epi32(cr)), BlendBlit::kRModShift - 8));
+ srcG = _mm256_add_epi32(dstG, _mm256_slli_epi32(_mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcG, ina), 8), _mm256_set1_epi32(cg)), BlendBlit::kGModShift - 8));
+ srcB = _mm256_add_epi32(dstB, _mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcB, ina), 8), _mm256_set1_epi32(cb)));
+ src = _mm256_or_si256(_mm256_and_si256(srcB, _mm256_set1_epi32(BlendBlit::kBModMask)), _mm256_set1_epi32(BlendBlit::kAModMask));
+ src = _mm256_or_si256(_mm256_and_si256(srcG, _mm256_set1_epi32(BlendBlit::kGModMask)), src);
+ src = _mm256_or_si256(_mm256_and_si256(srcR, _mm256_set1_epi32(BlendBlit::kRModMask)), src);
+ } else {
+ __m256i dstRB = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i srcRB = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i srcG = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+
+ dstRB = _mm256_srli_epi32(_mm256_mullo_epi32(dstRB, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), 8);
+ dstG = _mm256_srli_epi32(_mm256_mullo_epi16(dstG, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), 8);
+ srcRB = _mm256_slli_epi32(_mm256_add_epi32(dstRB, _mm256_srli_epi32(_mm256_mullo_epi32(srcRB, ina), 8)), BlendBlit::kBModShift);
+ srcG = _mm256_slli_epi32(_mm256_add_epi32(dstG, _mm256_srli_epi32(_mm256_mullo_epi16(srcG, ina), 8)), BlendBlit::kGModShift);
+ src = _mm256_or_si256(_mm256_and_si256(srcG, _mm256_set1_epi32(BlendBlit::kGModMask)), _mm256_set1_epi32(BlendBlit::kAModMask));
+ src = _mm256_or_si256(_mm256_and_si256(srcRB, _mm256_set1_epi32(BlendBlit::kBModMask | BlendBlit::kRModMask)), src);
+ }
+
+ dst = _mm256_and_si256(alphaMask, dst);
+ src = _mm256_andnot_si256(alphaMask, src);
+ return _mm256_or_si256(dst, src);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
+ uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
+ uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
+
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
+ out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
+ out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct MultiplyBlend {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m256i ina;
- if (alphamod)
- ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(ca)), 8);
- else
- ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
- __m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_setzero_si256());
-
- if (rgbmod) {
- __m256i srcb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i srcg = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i srcr = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m256i dstb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
-
- srcb = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcb, _mm256_set1_epi32(cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm256_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcg, _mm256_set1_epi32(cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcr, _mm256_set1_epi32(cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm256_set1_epi32(BlendBlit::kRModMask));
-
- src = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
- src = _mm256_or_si256(src, _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcr)));
- } else {
- __m256i srcg = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask));
- __m256i srcrb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i dstg = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask));
- __m256i dstrb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- srcg = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(srcg, ina), 8)), 8), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcrb = _mm256_and_si256(_mm256_mullo_epi32(dstrb, _mm256_srli_epi32(_mm256_mullo_epi32(srcrb, ina), 8)), _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
- src = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
- src = _mm256_or_si256(src, _mm256_or_si256(srcrb, srcg));
- }
-
- dst = _mm256_and_si256(alphaMask, dst);
- src = _mm256_andnot_si256(alphaMask, src);
- return _mm256_or_si256(dst, src);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
- }
- }
+ static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ __m256i ina;
+ if (alphamod)
+ ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(ca)), 8);
+ else
+ ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
+ __m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_setzero_si256());
+
+ if (rgbmod) {
+ __m256i srcb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i srcg = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i srcr = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m256i dstb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+
+ srcb = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcb, _mm256_set1_epi32(cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm256_set1_epi32(BlendBlit::kBModMask));
+ srcg = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcg, _mm256_set1_epi32(cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcr = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcr, _mm256_set1_epi32(cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm256_set1_epi32(BlendBlit::kRModMask));
+
+ src = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
+ src = _mm256_or_si256(src, _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcr)));
+ } else {
+ __m256i srcg = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask));
+ __m256i srcrb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i dstg = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask));
+ __m256i dstrb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ srcg = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(srcg, ina), 8)), 8), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcrb = _mm256_and_si256(_mm256_mullo_epi32(dstrb, _mm256_srli_epi32(_mm256_mullo_epi32(srcrb, ina), 8)), _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
+ src = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
+ src = _mm256_or_si256(src, _mm256_or_si256(srcrb, srcg));
+ }
+
+ dst = _mm256_and_si256(alphaMask, dst);
+ src = _mm256_andnot_si256(alphaMask, src);
+ return _mm256_or_si256(dst, src);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct OpaqueBlend {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- return _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
- }
+ static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ return _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
+ }
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
- }
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct BinaryBlend {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m256i alphaMask = _mm256_cmpeq_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_setzero_si256());
- dst = _mm256_and_si256(dst, alphaMask);
- src = _mm256_andnot_si256(alphaMask, _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)));
- return _mm256_or_si256(src, dst);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 pix = *(const uint32 *)in;
- int a = in[BlendBlit::kAIndex];
-
- if (a != 0) { // Full opacity (Any value not exactly 0 is Opaque here)
- *(uint32 *)out = pix;
- out[BlendBlit::kAIndex] = 0xFF;
- }
- }
+ static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ __m256i alphaMask = _mm256_cmpeq_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_setzero_si256());
+ dst = _mm256_and_si256(dst, alphaMask);
+ src = _mm256_andnot_si256(alphaMask, _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)));
+ return _mm256_or_si256(src, dst);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 pix = *(const uint32 *)in;
+ int a = in[BlendBlit::kAIndex];
+
+ if (a != 0) { // Full opacity (Any value not exactly 0 is Opaque here)
+ *(uint32 *)out = pix;
+ out[BlendBlit::kAIndex] = 0xFF;
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct AdditiveBlend {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m256i ina;
- if (alphamod)
- ina = _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(ca)), 8);
- else
- ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
- __m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_set1_epi32(0));
-
- if (rgbmod) {
- __m256i srcb = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask));
- __m256i srcg = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i srcr = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m256i dstb = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask));
- __m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
-
- srcb = _mm256_and_si256(_mm256_add_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(srcb, _mm256_mullo_epi32(_mm256_set1_epi32(cb), ina)), 16)), _mm256_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm256_and_si256(_mm256_add_epi32(dstg, _mm256_mullo_epi32(srcg, _mm256_mullo_epi32(_mm256_set1_epi32(cg), ina))), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm256_and_si256(_mm256_add_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(srcr, _mm256_mullo_epi32(_mm256_set1_epi32(cr), ina)), BlendBlit::kRModShift - 16)), _mm256_set1_epi32(BlendBlit::kRModMask));
-
- src = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kAModMask));
- src = _mm256_or_si256(src, _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcb)));
- } else if (alphamod) {
- __m256i srcg = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask));
- __m256i srcrb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i dstg = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask));
- __m256i dstrb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
-
- srcg = _mm256_and_si256(_mm256_add_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(srcg, ina), 8)), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcrb = _mm256_and_si256(_mm256_add_epi32(dstrb, _mm256_mullo_epi32(srcrb, ina)), _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
-
- src = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kAModMask));
- src = _mm256_or_si256(src, _mm256_or_si256(srcrb, srcg));
- } else {
- __m256i srcg = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask));
- __m256i srcrb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i dstg = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask));
- __m256i dstrb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
-
- srcg = _mm256_and_si256(_mm256_add_epi32(dstg, srcg), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcrb = _mm256_and_si256(_mm256_slli_epi32(_mm256_add_epi32(dstrb, srcrb), 8), _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
-
- src = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kAModMask));
- src = _mm256_or_si256(src, _mm256_or_si256(srcrb, srcg));
- }
-
- dst = _mm256_and_si256(alphaMask, dst);
- src = _mm256_andnot_si256(alphaMask, src);
- return _mm256_or_si256(dst, src);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
- }
- }
+ static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ __m256i ina;
+ if (alphamod)
+ ina = _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(ca)), 8);
+ else
+ ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
+ __m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_set1_epi32(0));
+
+ if (rgbmod) {
+ __m256i srcb = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask));
+ __m256i srcg = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i srcr = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m256i dstb = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask));
+ __m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+
+ srcb = _mm256_and_si256(_mm256_add_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(srcb, _mm256_mullo_epi32(_mm256_set1_epi32(cb), ina)), 16)), _mm256_set1_epi32(BlendBlit::kBModMask));
+ srcg = _mm256_and_si256(_mm256_add_epi32(dstg, _mm256_mullo_epi32(srcg, _mm256_mullo_epi32(_mm256_set1_epi32(cg), ina))), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcr = _mm256_and_si256(_mm256_add_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(srcr, _mm256_mullo_epi32(_mm256_set1_epi32(cr), ina)), BlendBlit::kRModShift - 16)), _mm256_set1_epi32(BlendBlit::kRModMask));
+
+ src = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kAModMask));
+ src = _mm256_or_si256(src, _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcb)));
+ } else if (alphamod) {
+ __m256i srcg = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask));
+ __m256i srcrb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i dstg = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask));
+ __m256i dstrb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+
+ srcg = _mm256_and_si256(_mm256_add_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(srcg, ina), 8)), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcrb = _mm256_and_si256(_mm256_add_epi32(dstrb, _mm256_mullo_epi32(srcrb, ina)), _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
+
+ src = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kAModMask));
+ src = _mm256_or_si256(src, _mm256_or_si256(srcrb, srcg));
+ } else {
+ __m256i srcg = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask));
+ __m256i srcrb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i dstg = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask));
+ __m256i dstrb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+
+ srcg = _mm256_and_si256(_mm256_add_epi32(dstg, srcg), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcrb = _mm256_and_si256(_mm256_slli_epi32(_mm256_add_epi32(dstrb, srcrb), 8), _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
+
+ src = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kAModMask));
+ src = _mm256_or_si256(src, _mm256_or_si256(srcrb, srcg));
+ }
+
+ dst = _mm256_and_si256(alphaMask, dst);
+ src = _mm256_andnot_si256(alphaMask, src);
+ return _mm256_or_si256(dst, src);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct SubtractiveBlend {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m256i ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
- __m256i srcb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i srcg = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i srcr = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m256i dstb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
-
- srcb = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcb, _mm256_set1_epi32(cb)), _mm256_mullo_epi32(dstb, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kBModShift), _mm256_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcg, _mm256_set1_epi32(cg)), _mm256_mullo_epi32(dstg, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kGModShift), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcr, _mm256_set1_epi32(cr)), _mm256_mullo_epi32(dstr, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kRModShift), _mm256_set1_epi32(BlendBlit::kRModMask));
-
- return _mm256_or_si256(_mm256_set1_epi32(BlendBlit::kAModMask), _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcr)));
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- }
+ static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ __m256i ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
+ __m256i srcb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i srcg = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i srcr = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m256i dstb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+
+ srcb = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcb, _mm256_set1_epi32(cb)), _mm256_mullo_epi32(dstb, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kBModShift), _mm256_set1_epi32(BlendBlit::kBModMask));
+ srcg = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcg, _mm256_set1_epi32(cg)), _mm256_mullo_epi32(dstg, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kGModShift), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcr = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcr, _mm256_set1_epi32(cr)), _mm256_mullo_epi32(dstr, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kRModShift), _mm256_set1_epi32(BlendBlit::kRModMask));
+
+ return _mm256_or_si256(_mm256_set1_epi32(BlendBlit::kAModMask), _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcr)));
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ }
};
class BlendBlitImpl {
public:
template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
static void blitInnerLoop(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
- const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
- const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
- const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
- const uint32 cr = coloradd1 ? (rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod ? rawcr : 255);
- const uint32 cg = coloradd1 ? (rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod ? rawcg : 255);
- const uint32 cb = coloradd1 ? (rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod ? rawcb : 255);
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- if (!doscale && (args.flipping & FLIP_H)) args.ino -= 4 * 7;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
-
- uint32 j = 0;
- for (; j + 8 <= args.width; j += 8) {
- __m256i dstPixels, srcPixels;
- if (loaddst) dstPixels = _mm256_loadu_si256((const __m256i *)out);
- if (!doscale) {
- srcPixels = _mm256_loadu_si256((const __m256i *)in);
- } else {
- srcPixels = _mm256_setr_epi32(
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 0) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 1) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 2) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 3) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 4) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 5) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 6) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 7) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep)
- );
- scaleXCtr += args.scaleX * 8;
- }
- if (!doscale && (args.flipping & FLIP_H)) {
- srcPixels = _mm256_shuffle_epi32(srcPixels, _MM_SHUFFLE(0, 1, 2, 3));
- srcPixels = _mm256_permute2x128_si256(srcPixels, srcPixels, 0x01);
- }
- {
- const __m256i res = PixelFunc<doscale, rgbmod, alphamod>::simd(srcPixels, dstPixels, args.flipping & FLIP_H, ca, cr, cg, cb);
- _mm256_storeu_si256((__m256i *)out, res);
- }
- if (!doscale) in += (ptrdiff_t)args.inStep * 8;
- out += 4ULL * 8;
- }
- if (!doscale && (args.flipping & FLIP_H)) in += 4 * 7;
- for (; j < args.width; j++) {
- if (doscale) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- }
-
- PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
-
- if (doscale)
- scaleXCtr += args.scaleX;
- else
- in += args.inStep;
- out += 4;
- }
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
+ const byte *in;
+ byte *out;
+
+ const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
+ const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
+ const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
+ const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
+ const uint32 cr = coloradd1 ? (rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod ? rawcr : 255);
+ const uint32 cg = coloradd1 ? (rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod ? rawcg : 255);
+ const uint32 cb = coloradd1 ? (rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod ? rawcb : 255);
+
+ int scaleXCtr, scaleYCtr = args.scaleYoff;
+ const byte *inBase;
+
+ if (!doscale && (args.flipping & FLIP_H)) args.ino -= 4 * 7;
+
+ for (uint32 i = 0; i < args.height; i++) {
+ if (doscale) {
+ inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+ scaleXCtr = args.scaleXoff;
+ } else {
+ in = args.ino;
+ }
+ out = args.outo;
+
+ uint32 j = 0;
+ for (; j + 8 <= args.width; j += 8) {
+ __m256i dstPixels, srcPixels;
+ if (loaddst) dstPixels = _mm256_loadu_si256((const __m256i *)out);
+ if (!doscale) {
+ srcPixels = _mm256_loadu_si256((const __m256i *)in);
+ } else {
+ srcPixels = _mm256_setr_epi32(
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 0) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 1) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 2) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 3) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 4) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 5) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 6) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 7) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep)
+ );
+ scaleXCtr += args.scaleX * 8;
+ }
+ if (!doscale && (args.flipping & FLIP_H)) {
+ srcPixels = _mm256_shuffle_epi32(srcPixels, _MM_SHUFFLE(0, 1, 2, 3));
+ srcPixels = _mm256_permute2x128_si256(srcPixels, srcPixels, 0x01);
+ }
+ {
+ const __m256i res = PixelFunc<doscale, rgbmod, alphamod>::simd(srcPixels, dstPixels, args.flipping & FLIP_H, ca, cr, cg, cb);
+ _mm256_storeu_si256((__m256i *)out, res);
+ }
+ if (!doscale) in += (ptrdiff_t)args.inStep * 8;
+ out += 4ULL * 8;
+ }
+ if (!doscale && (args.flipping & FLIP_H)) in += 4 * 7;
+ for (; j < args.width; j++) {
+ if (doscale) {
+ in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+ }
+
+ PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
+
+ if (doscale)
+ scaleXCtr += args.scaleX;
+ else
+ in += args.inStep;
+ out += 4;
+ }
+ if (doscale)
+ scaleYCtr += args.scaleY;
+ else
+ args.ino += args.inoStep;
+ args.outo += args.dstPitch;
+ }
}
}; // end of class BlendBlitImpl
void BlendBlit::blitAVX2(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
- bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
- bool alphamod = ((args.color & kAModMask) != kAModMask);
- if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
- if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, false, false, false, false, true>(args);
- } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, false, false, false, false, true>(args);
- } else {
- if (blendMode == BLEND_ADDITIVE) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, false, false, true>(args);
- }
- }
- } else if (blendMode == BLEND_SUBTRACTIVE) {
- if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, true, false, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, false, false, false, true>(args);
- }
- } else if (blendMode == BLEND_MULTIPLY) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, false, false, true>(args);
- }
- }
- } else {
- assert(blendMode == BLEND_NORMAL);
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, false, false, true>(args);
- }
- }
- }
- }
- } else {
- if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, true, false, false, false, true>(args);
- } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, true, false, false, false, true>(args);
- } else {
- if (blendMode == BLEND_ADDITIVE) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, false, false, true>(args);
- }
- }
- } else if (blendMode == BLEND_SUBTRACTIVE) {
- if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, true, false, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, false, false, false, true>(args);
- }
- } else if (blendMode == BLEND_MULTIPLY) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, false, false, true>(args);
- }
- }
- } else {
- assert(blendMode == BLEND_NORMAL);
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, false, false, true>(args);
- }
- }
- }
- }
- }
+ bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
+ bool alphamod = ((args.color & kAModMask) != kAModMask);
+ if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
+ if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+ BlendBlitImpl::blitInnerLoop<OpaqueBlend, false, false, false, false, true>(args);
+ } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+ BlendBlitImpl::blitInnerLoop<BinaryBlend, false, false, false, false, true>(args);
+ } else {
+ if (blendMode == BLEND_ADDITIVE) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, false, false, true>(args);
+ }
+ }
+ } else if (blendMode == BLEND_SUBTRACTIVE) {
+ if (rgbmod) {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, true, false, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, false, false, false, true>(args);
+ }
+ } else if (blendMode == BLEND_MULTIPLY) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, false, false, true>(args);
+ }
+ }
+ } else {
+ assert(blendMode == BLEND_NORMAL);
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, false, false, true>(args);
+ }
+ }
+ }
+ }
+ } else {
+ if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+ BlendBlitImpl::blitInnerLoop<OpaqueBlend, true, false, false, false, true>(args);
+ } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+ BlendBlitImpl::blitInnerLoop<BinaryBlend, true, false, false, false, true>(args);
+ } else {
+ if (blendMode == BLEND_ADDITIVE) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, false, false, true>(args);
+ }
+ }
+ } else if (blendMode == BLEND_SUBTRACTIVE) {
+ if (rgbmod) {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, true, false, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, false, false, false, true>(args);
+ }
+ } else if (blendMode == BLEND_MULTIPLY) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, false, false, true>(args);
+ }
+ }
+ } else {
+ assert(blendMode == BLEND_NORMAL);
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, false, false, true>(args);
+ }
+ }
+ }
+ }
+ }
}
} // End of namespace Graphics
diff --git a/graphics/blit/blit-neon.cpp b/graphics/blit/blit-neon.cpp
index 8f7480b566d..3f89078a044 100644
--- a/graphics/blit/blit-neon.cpp
+++ b/graphics/blit/blit-neon.cpp
@@ -31,236 +31,236 @@ namespace Graphics {
template<bool doscale, bool rgbmod, bool alphamod>
struct AlphaBlend {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32x4_t ina;
- if (alphamod)
- ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(ca)), 8);
- else
- ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
- uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
-
- if (rgbmod) {
- uint32x4_t dstR = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), 16);
- uint32x4_t srcR = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), 16);
- uint32x4_t dstG = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), 8);
- uint32x4_t srcG = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), 8);
- uint32x4_t dstB = vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask));
- uint32x4_t srcB = vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask));
-
- dstR = vshrq_n_u32(vmulq_u32(dstR, vsubq_u32(vmovq_n_u32(255), ina)), 8);
- dstG = vshrq_n_u32(vmulq_u32(dstG, vsubq_u32(vmovq_n_u32(255), ina)), 8);
- dstB = vshrq_n_u32(vmulq_u32(dstB, vsubq_u32(vmovq_n_u32(255), ina)), 8);
- srcR = vaddq_u32(dstR, vshrq_n_u32(vmulq_u32(vmulq_u32(srcR, ina), vmovq_n_u32(cr)), 16));
- srcG = vaddq_u32(dstG, vshrq_n_u32(vmulq_u32(vmulq_u32(srcG, ina), vmovq_n_u32(cg)), 16));
- srcB = vaddq_u32(dstB, vshrq_n_u32(vmulq_u32(vmulq_u32(srcB, ina), vmovq_n_u32(cb)), 16));
- src = vorrq_u32(vandq_u32(srcB, vmovq_n_u32(BlendBlit::kBModMask)), vmovq_n_u32(BlendBlit::kAModMask));
- src = vorrq_u32(vandq_u32(vshlq_n_u32(srcG, 8), vmovq_n_u32(BlendBlit::kGModMask)), src);
- src = vorrq_u32(vandq_u32(vshlq_n_u32(srcR, 16), vmovq_n_u32(BlendBlit::kRModMask)), src);
- } else {
- uint32x4_t dstRB = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), 8);
- uint32x4_t srcRB = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), 8);
- uint32x4_t dstG = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
- uint32x4_t srcG = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
-
- dstRB = vmulq_u32(dstRB, vsubq_u32(vmovq_n_u32(255), ina));
- dstG = vshrq_n_u32(vmulq_u32(dstG, vsubq_u32(vmovq_n_u32(255), ina)), 8);
- srcRB = vaddq_u32(dstRB, vmulq_u32(srcRB, ina));
- srcG = vaddq_u32(dstG, vshrq_n_u32(vmulq_u32(srcG, ina), 8));
- src = vorrq_u32(vandq_u32(srcG, vmovq_n_u32(BlendBlit::kGModMask)), vmovq_n_u32(BlendBlit::kAModMask));
- src = vorrq_u32(vandq_u32(srcRB, vmovq_n_u32(BlendBlit::kBModMask | BlendBlit::kRModMask)), src);
- }
-
- dst = vandq_u32(alphaMask, dst);
- src = vandq_u32(vmvnq_u32(alphaMask), src);
- return vorrq_u32(dst, src);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
- uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
- uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
-
- out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
- out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
- out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
- }
- }
+ static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32x4_t ina;
+ if (alphamod)
+ ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(ca)), 8);
+ else
+ ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
+ uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
+
+ if (rgbmod) {
+ uint32x4_t dstR = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), 16);
+ uint32x4_t srcR = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), 16);
+ uint32x4_t dstG = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), 8);
+ uint32x4_t srcG = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), 8);
+ uint32x4_t dstB = vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask));
+ uint32x4_t srcB = vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask));
+
+ dstR = vshrq_n_u32(vmulq_u32(dstR, vsubq_u32(vmovq_n_u32(255), ina)), 8);
+ dstG = vshrq_n_u32(vmulq_u32(dstG, vsubq_u32(vmovq_n_u32(255), ina)), 8);
+ dstB = vshrq_n_u32(vmulq_u32(dstB, vsubq_u32(vmovq_n_u32(255), ina)), 8);
+ srcR = vaddq_u32(dstR, vshrq_n_u32(vmulq_u32(vmulq_u32(srcR, ina), vmovq_n_u32(cr)), 16));
+ srcG = vaddq_u32(dstG, vshrq_n_u32(vmulq_u32(vmulq_u32(srcG, ina), vmovq_n_u32(cg)), 16));
+ srcB = vaddq_u32(dstB, vshrq_n_u32(vmulq_u32(vmulq_u32(srcB, ina), vmovq_n_u32(cb)), 16));
+ src = vorrq_u32(vandq_u32(srcB, vmovq_n_u32(BlendBlit::kBModMask)), vmovq_n_u32(BlendBlit::kAModMask));
+ src = vorrq_u32(vandq_u32(vshlq_n_u32(srcG, 8), vmovq_n_u32(BlendBlit::kGModMask)), src);
+ src = vorrq_u32(vandq_u32(vshlq_n_u32(srcR, 16), vmovq_n_u32(BlendBlit::kRModMask)), src);
+ } else {
+ uint32x4_t dstRB = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), 8);
+ uint32x4_t srcRB = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), 8);
+ uint32x4_t dstG = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
+ uint32x4_t srcG = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
+
+ dstRB = vmulq_u32(dstRB, vsubq_u32(vmovq_n_u32(255), ina));
+ dstG = vshrq_n_u32(vmulq_u32(dstG, vsubq_u32(vmovq_n_u32(255), ina)), 8);
+ srcRB = vaddq_u32(dstRB, vmulq_u32(srcRB, ina));
+ srcG = vaddq_u32(dstG, vshrq_n_u32(vmulq_u32(srcG, ina), 8));
+ src = vorrq_u32(vandq_u32(srcG, vmovq_n_u32(BlendBlit::kGModMask)), vmovq_n_u32(BlendBlit::kAModMask));
+ src = vorrq_u32(vandq_u32(srcRB, vmovq_n_u32(BlendBlit::kBModMask | BlendBlit::kRModMask)), src);
+ }
+
+ dst = vandq_u32(alphaMask, dst);
+ src = vandq_u32(vmvnq_u32(alphaMask), src);
+ return vorrq_u32(dst, src);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
+ uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
+ uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
+
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
+ out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
+ out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct MultiplyBlend {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32x4_t ina;
- if (alphamod)
- ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(ca)), 8);
- else
- ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
- uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
-
- if (rgbmod) {
- uint32x4_t srcb = vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask));
- uint32x4_t srcg = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- uint32x4_t srcr = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- uint32x4_t dstb = vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask));
- uint32x4_t dstg = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- uint32x4_t dstr = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
-
- srcb = vandq_u32(vshrq_n_u32(vmulq_u32(dstb, vshrq_n_u32(vmulq_u32(vmulq_u32(srcb, vmovq_n_u32(cb)), ina), 16)), 8), vmovq_n_u32(BlendBlit::kBModMask));
- srcg = vandq_u32(vshlq_n_u32(vmulq_u32(dstg, vshrq_n_u32(vmulq_u32(vmulq_u32(srcg, vmovq_n_u32(cg)), ina), 16)), BlendBlit::kGModShift - 8), vmovq_n_u32(BlendBlit::kGModMask));
- srcr = vandq_u32(vshlq_n_u32(vmulq_u32(dstr, vshrq_n_u32(vmulq_u32(vmulq_u32(srcr, vmovq_n_u32(cr)), ina), 16)), BlendBlit::kRModShift - 8), vmovq_n_u32(BlendBlit::kRModMask));
-
- src = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
- src = vorrq_u32(src, vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
- } else {
- uint32x4_t srcg = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
- uint32x4_t srcrb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- uint32x4_t dstg = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
- uint32x4_t dstrb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- srcg = vandq_u32(vshrq_n_u32(vmulq_u32(dstg, vshrq_n_u32(vmulq_u32(srcg, ina), 8)), 8), vmovq_n_u32(BlendBlit::kGModMask));
- srcrb = vandq_u32(vmulq_u32(dstrb, vshrq_n_u32(vmulq_u32(srcrb, ina), 8)), vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask));
- src = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
- src = vorrq_u32(src, vorrq_u32(srcrb, srcg));
- }
-
- dst = vandq_u32(alphaMask, dst);
- src = vandq_u32(vmvnq_u32(alphaMask), src);
- return vorrq_u32(dst, src);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
- }
- }
+ static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32x4_t ina;
+ if (alphamod)
+ ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(ca)), 8);
+ else
+ ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
+ uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
+
+ if (rgbmod) {
+ uint32x4_t srcb = vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask));
+ uint32x4_t srcg = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ uint32x4_t srcr = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ uint32x4_t dstb = vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask));
+ uint32x4_t dstg = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ uint32x4_t dstr = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+
+ srcb = vandq_u32(vshrq_n_u32(vmulq_u32(dstb, vshrq_n_u32(vmulq_u32(vmulq_u32(srcb, vmovq_n_u32(cb)), ina), 16)), 8), vmovq_n_u32(BlendBlit::kBModMask));
+ srcg = vandq_u32(vshlq_n_u32(vmulq_u32(dstg, vshrq_n_u32(vmulq_u32(vmulq_u32(srcg, vmovq_n_u32(cg)), ina), 16)), BlendBlit::kGModShift - 8), vmovq_n_u32(BlendBlit::kGModMask));
+ srcr = vandq_u32(vshlq_n_u32(vmulq_u32(dstr, vshrq_n_u32(vmulq_u32(vmulq_u32(srcr, vmovq_n_u32(cr)), ina), 16)), BlendBlit::kRModShift - 8), vmovq_n_u32(BlendBlit::kRModMask));
+
+ src = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
+ src = vorrq_u32(src, vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
+ } else {
+ uint32x4_t srcg = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
+ uint32x4_t srcrb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ uint32x4_t dstg = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
+ uint32x4_t dstrb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ srcg = vandq_u32(vshrq_n_u32(vmulq_u32(dstg, vshrq_n_u32(vmulq_u32(srcg, ina), 8)), 8), vmovq_n_u32(BlendBlit::kGModMask));
+ srcrb = vandq_u32(vmulq_u32(dstrb, vshrq_n_u32(vmulq_u32(srcrb, ina), 8)), vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask));
+ src = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
+ src = vorrq_u32(src, vorrq_u32(srcrb, srcg));
+ }
+
+ dst = vandq_u32(alphaMask, dst);
+ src = vandq_u32(vmvnq_u32(alphaMask), src);
+ return vorrq_u32(dst, src);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct OpaqueBlend {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- return vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
- }
+ static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ return vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
+ }
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
- }
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct BinaryBlend {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32x4_t alphaMask = vceqq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmovq_n_u32(0));
- dst = vandq_u32(dst, alphaMask);
- src = vandq_u32(vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmvnq_u32(alphaMask));
- return vorrq_u32(dst, src);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 pix = *(const uint32 *)in;
- int a = in[BlendBlit::kAIndex];
-
- if (a != 0) { // Full opacity (Any value not exactly 0 is Opaque here)
- *(uint32 *)out = pix;
- out[BlendBlit::kAIndex] = 0xFF;
- }
- }
+ static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32x4_t alphaMask = vceqq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmovq_n_u32(0));
+ dst = vandq_u32(dst, alphaMask);
+ src = vandq_u32(vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmvnq_u32(alphaMask));
+ return vorrq_u32(dst, src);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 pix = *(const uint32 *)in;
+ int a = in[BlendBlit::kAIndex];
+
+ if (a != 0) { // Full opacity (Any value not exactly 0 is Opaque here)
+ *(uint32 *)out = pix;
+ out[BlendBlit::kAIndex] = 0xFF;
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct AdditiveBlend {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32x4_t ina;
- if (alphamod)
- ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(ca)), 8);
- else
- ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
- uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
-
- if (rgbmod) {
- uint32x4_t srcb = vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask));
- uint32x4_t srcg = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- uint32x4_t srcr = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- uint32x4_t dstb = vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask));
- uint32x4_t dstg = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- uint32x4_t dstr = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
-
- srcb = vandq_u32(vaddq_u32(dstb, vshrq_n_u32(vmulq_u32(srcb, vmulq_u32(vmovq_n_u32(cb), ina)), 16)), vmovq_n_u32(BlendBlit::kBModMask));
- srcg = vandq_u32(vaddq_u32(dstg, vmulq_u32(srcg, vmulq_u32(vmovq_n_u32(cg), ina))), vmovq_n_u32(BlendBlit::kGModMask));
- srcr = vandq_u32(vaddq_u32(dstr, vshrq_n_u32(vmulq_u32(srcr, vmulq_u32(vmovq_n_u32(cr), ina)), BlendBlit::kRModShift - 16)), vmovq_n_u32(BlendBlit::kRModMask));
-
- src = vandq_u32(dst, vmovq_n_u32(BlendBlit::kAModMask));
- src = vorrq_u32(src, vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
- } else if (alphamod) {
- uint32x4_t srcg = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
- uint32x4_t srcrb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- uint32x4_t dstg = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
- uint32x4_t dstrb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
-
- srcg = vandq_u32(vaddq_u32(dstg, vshrq_n_u32(vmulq_u32(srcg, ina), 8)), vmovq_n_u32(BlendBlit::kGModMask));
- srcrb = vandq_u32(vaddq_u32(dstrb, vmulq_u32(srcrb, ina)), vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask));
-
- src = vandq_u32(dst, vmovq_n_u32(BlendBlit::kAModMask));
- src = vorrq_u32(src, vorrq_u32(srcrb, srcg));
- } else {
- uint32x4_t srcg = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
- uint32x4_t srcrb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- uint32x4_t dstg = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
- uint32x4_t dstrb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
-
- srcg = vandq_u32(vaddq_u32(dstg, srcg), vmovq_n_u32(BlendBlit::kGModMask));
- srcrb = vandq_u32(vshlq_n_u32(vaddq_u32(dstrb, srcrb), 8), vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask));
-
- src = vandq_u32(dst, vmovq_n_u32(BlendBlit::kAModMask));
- src = vorrq_u32(src, vorrq_u32(srcrb, srcg));
- }
-
- dst = vandq_u32(alphaMask, dst);
- src = vandq_u32(vmvnq_u32(alphaMask), src);
- return vorrq_u32(dst, src);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
- }
- }
+ static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32x4_t ina;
+ if (alphamod)
+ ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(ca)), 8);
+ else
+ ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
+ uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
+
+ if (rgbmod) {
+ uint32x4_t srcb = vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask));
+ uint32x4_t srcg = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ uint32x4_t srcr = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ uint32x4_t dstb = vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask));
+ uint32x4_t dstg = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ uint32x4_t dstr = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+
+ srcb = vandq_u32(vaddq_u32(dstb, vshrq_n_u32(vmulq_u32(srcb, vmulq_u32(vmovq_n_u32(cb), ina)), 16)), vmovq_n_u32(BlendBlit::kBModMask));
+ srcg = vandq_u32(vaddq_u32(dstg, vmulq_u32(srcg, vmulq_u32(vmovq_n_u32(cg), ina))), vmovq_n_u32(BlendBlit::kGModMask));
+ srcr = vandq_u32(vaddq_u32(dstr, vshrq_n_u32(vmulq_u32(srcr, vmulq_u32(vmovq_n_u32(cr), ina)), BlendBlit::kRModShift - 16)), vmovq_n_u32(BlendBlit::kRModMask));
+
+ src = vandq_u32(dst, vmovq_n_u32(BlendBlit::kAModMask));
+ src = vorrq_u32(src, vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
+ } else if (alphamod) {
+ uint32x4_t srcg = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
+ uint32x4_t srcrb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ uint32x4_t dstg = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
+ uint32x4_t dstrb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+
+ srcg = vandq_u32(vaddq_u32(dstg, vshrq_n_u32(vmulq_u32(srcg, ina), 8)), vmovq_n_u32(BlendBlit::kGModMask));
+ srcrb = vandq_u32(vaddq_u32(dstrb, vmulq_u32(srcrb, ina)), vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask));
+
+ src = vandq_u32(dst, vmovq_n_u32(BlendBlit::kAModMask));
+ src = vorrq_u32(src, vorrq_u32(srcrb, srcg));
+ } else {
+ uint32x4_t srcg = vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask));
+ uint32x4_t srcrb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ uint32x4_t dstg = vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask));
+ uint32x4_t dstrb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+
+ srcg = vandq_u32(vaddq_u32(dstg, srcg), vmovq_n_u32(BlendBlit::kGModMask));
+ srcrb = vandq_u32(vshlq_n_u32(vaddq_u32(dstrb, srcrb), 8), vmovq_n_u32(BlendBlit::kRModMask | BlendBlit::kBModMask));
+
+ src = vandq_u32(dst, vmovq_n_u32(BlendBlit::kAModMask));
+ src = vorrq_u32(src, vorrq_u32(srcrb, srcg));
+ }
+
+ dst = vandq_u32(alphaMask, dst);
+ src = vandq_u32(vmvnq_u32(alphaMask), src);
+ return vorrq_u32(dst, src);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct SubtractiveBlend {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32x4_t ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
- uint32x4_t srcb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- uint32x4_t srcg = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- uint32x4_t srcr = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- uint32x4_t dstb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- uint32x4_t dstg = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- uint32x4_t dstr = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
-
- srcb = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstb), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcb, vmovq_n_u32(cb)), vmulq_u32(dstb, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kBModShift), vmovq_n_u32(BlendBlit::kBModMask));
- srcg = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstg), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcg, vmovq_n_u32(cg)), vmulq_u32(dstg, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kGModShift), vmovq_n_u32(BlendBlit::kGModMask));
- srcr = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstr), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcr, vmovq_n_u32(cr)), vmulq_u32(dstr, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kRModShift), vmovq_n_u32(BlendBlit::kRModMask));
-
- return vorrq_u32(vmovq_n_u32(BlendBlit::kAModMask), vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- }
+ static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32x4_t ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
+ uint32x4_t srcb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ uint32x4_t srcg = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ uint32x4_t srcr = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ uint32x4_t dstb = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ uint32x4_t dstg = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ uint32x4_t dstr = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+
+ srcb = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstb), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcb, vmovq_n_u32(cb)), vmulq_u32(dstb, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kBModShift), vmovq_n_u32(BlendBlit::kBModMask));
+ srcg = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstg), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcg, vmovq_n_u32(cg)), vmulq_u32(dstg, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kGModShift), vmovq_n_u32(BlendBlit::kGModMask));
+ srcr = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstr), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcr, vmovq_n_u32(cr)), vmulq_u32(dstr, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kRModShift), vmovq_n_u32(BlendBlit::kRModMask));
+
+ return vorrq_u32(vmovq_n_u32(BlendBlit::kAModMask), vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ }
};
class BlendBlitImpl {
@@ -268,200 +268,200 @@ class BlendBlitImpl {
public:
template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
static inline void blitInnerLoop(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
- const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
- const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
- const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
- const uint32 cr = coloradd1 ? (rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod ? rawcr : 255);
- const uint32 cg = coloradd1 ? (rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod ? rawcg : 255);
- const uint32 cb = coloradd1 ? (rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod ? rawcb : 255);
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- if (!doscale && (args.flipping & FLIP_H)) args.ino -= 4 * 3;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
- uint32 j = 0;
- for (; j + 4 <= args.width; j += 4) {
- uint32x4_t dstPixels;
- if (loaddst) dstPixels = vld1q_u32((const uint32 *)out);
- uint32x4_t srcPixels;
- if (!doscale) {
- srcPixels = vld1q_u32((const uint32 *)in);
- } else {
- srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), vmovq_n_u32(0), 0);
- scaleXCtr += args.scaleX;
- srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), srcPixels, 1);
- scaleXCtr += args.scaleX;
- srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), srcPixels, 2);
- scaleXCtr += args.scaleX;
- srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), srcPixels, 3);
- scaleXCtr += args.scaleX;
- }
- if (!doscale && (args.flipping & FLIP_H)) {
- srcPixels = vrev64q_u32(srcPixels);
- srcPixels = vcombine_u32(vget_high_u32(srcPixels), vget_low_u32(srcPixels));
- }
- {
- const uint32x4_t res = PixelFunc<doscale, rgbmod, alphamod>::simd(srcPixels, dstPixels, args.flipping & FLIP_H, ca, cr, cg, cb);
- vst1q_u32((uint32 *)out, res);
- }
- if (!doscale) in += args.inStep * 4;
- out += 4 * 4;
- }
- if (!doscale && (args.flipping & FLIP_H)) in += 4 * 3;
- for (; j < args.width; j++) {
- if (doscale) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- }
-
- PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
-
- if (doscale)
- scaleXCtr += args.scaleX;
- else
- in += args.inStep;
- out += 4;
- }
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
+ const byte *in;
+ byte *out;
+
+ const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
+ const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
+ const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
+ const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
+ const uint32 cr = coloradd1 ? (rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod ? rawcr : 255);
+ const uint32 cg = coloradd1 ? (rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod ? rawcg : 255);
+ const uint32 cb = coloradd1 ? (rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod ? rawcb : 255);
+
+ int scaleXCtr, scaleYCtr = args.scaleYoff;
+ const byte *inBase;
+
+ if (!doscale && (args.flipping & FLIP_H)) args.ino -= 4 * 3;
+
+ for (uint32 i = 0; i < args.height; i++) {
+ if (doscale) {
+ inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+ scaleXCtr = args.scaleXoff;
+ } else {
+ in = args.ino;
+ }
+ out = args.outo;
+ uint32 j = 0;
+ for (; j + 4 <= args.width; j += 4) {
+ uint32x4_t dstPixels;
+ if (loaddst) dstPixels = vld1q_u32((const uint32 *)out);
+ uint32x4_t srcPixels;
+ if (!doscale) {
+ srcPixels = vld1q_u32((const uint32 *)in);
+ } else {
+ srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), vmovq_n_u32(0), 0);
+ scaleXCtr += args.scaleX;
+ srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), srcPixels, 1);
+ scaleXCtr += args.scaleX;
+ srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), srcPixels, 2);
+ scaleXCtr += args.scaleX;
+ srcPixels = vsetq_lane_u32(*(const uint32 *)(inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep), srcPixels, 3);
+ scaleXCtr += args.scaleX;
+ }
+ if (!doscale && (args.flipping & FLIP_H)) {
+ srcPixels = vrev64q_u32(srcPixels);
+ srcPixels = vcombine_u32(vget_high_u32(srcPixels), vget_low_u32(srcPixels));
+ }
+ {
+ const uint32x4_t res = PixelFunc<doscale, rgbmod, alphamod>::simd(srcPixels, dstPixels, args.flipping & FLIP_H, ca, cr, cg, cb);
+ vst1q_u32((uint32 *)out, res);
+ }
+ if (!doscale) in += args.inStep * 4;
+ out += 4 * 4;
+ }
+ if (!doscale && (args.flipping & FLIP_H)) in += 4 * 3;
+ for (; j < args.width; j++) {
+ if (doscale) {
+ in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+ }
+
+ PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
+
+ if (doscale)
+ scaleXCtr += args.scaleX;
+ else
+ in += args.inStep;
+ out += 4;
+ }
+ if (doscale)
+ scaleYCtr += args.scaleY;
+ else
+ args.ino += args.inoStep;
+ args.outo += args.dstPitch;
+ }
}
}; // end of class BlendBlitImpl
void BlendBlit::blitNEON(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
- bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
- bool alphamod = ((args.color & kAModMask) != kAModMask);
- if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
- if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, false, false, false, false, true>(args);
- } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, false, false, false, false, true>(args);
- } else {
- if (blendMode == BLEND_ADDITIVE) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, false, false, true>(args);
- }
- }
- } else if (blendMode == BLEND_SUBTRACTIVE) {
- if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, true, false, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, false, false, false, true>(args);
- }
- } else if (blendMode == BLEND_MULTIPLY) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, false, false, true>(args);
- }
- }
- } else {
- assert(blendMode == BLEND_NORMAL);
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, false, false, true>(args);
- }
- }
- }
- }
- } else {
- if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, true, false, false, false, true>(args);
- } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, true, false, false, false, true>(args);
- } else {
- if (blendMode == BLEND_ADDITIVE) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, false, false, true>(args);
- }
- }
- } else if (blendMode == BLEND_SUBTRACTIVE) {
- if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, true, false, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, false, false, false, true>(args);
- }
- } else if (blendMode == BLEND_MULTIPLY) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, false, false, true>(args);
- }
- }
- } else {
- assert(blendMode == BLEND_NORMAL);
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, false, false, true>(args);
- }
- }
- }
- }
- }
+ bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
+ bool alphamod = ((args.color & kAModMask) != kAModMask);
+ if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
+ if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+ BlendBlitImpl::blitInnerLoop<OpaqueBlend, false, false, false, false, true>(args);
+ } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+ BlendBlitImpl::blitInnerLoop<BinaryBlend, false, false, false, false, true>(args);
+ } else {
+ if (blendMode == BLEND_ADDITIVE) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, false, false, true>(args);
+ }
+ }
+ } else if (blendMode == BLEND_SUBTRACTIVE) {
+ if (rgbmod) {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, true, false, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, false, false, false, true>(args);
+ }
+ } else if (blendMode == BLEND_MULTIPLY) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, false, false, true>(args);
+ }
+ }
+ } else {
+ assert(blendMode == BLEND_NORMAL);
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, false, false, true>(args);
+ }
+ }
+ }
+ }
+ } else {
+ if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+ BlendBlitImpl::blitInnerLoop<OpaqueBlend, true, false, false, false, true>(args);
+ } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+ BlendBlitImpl::blitInnerLoop<BinaryBlend, true, false, false, false, true>(args);
+ } else {
+ if (blendMode == BLEND_ADDITIVE) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, false, false, true>(args);
+ }
+ }
+ } else if (blendMode == BLEND_SUBTRACTIVE) {
+ if (rgbmod) {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, true, false, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, false, false, false, true>(args);
+ }
+ } else if (blendMode == BLEND_MULTIPLY) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, false, false, true>(args);
+ }
+ }
+ } else {
+ assert(blendMode == BLEND_NORMAL);
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, false, false, true>(args);
+ }
+ }
+ }
+ }
+ }
}
} // end of namespace Graphics
diff --git a/graphics/blit/blit-scale.cpp b/graphics/blit/blit-scale.cpp
index bfaf0b926fa..c58593be505 100644
--- a/graphics/blit/blit-scale.cpp
+++ b/graphics/blit/blit-scale.cpp
@@ -74,7 +74,7 @@ bool scaleBlit(byte *dst, const byte *src,
const uint dstW, const uint dstH,
const uint srcW, const uint srcH,
const Graphics::PixelFormat &fmt,
- const byte flip) {
+ const byte flip) {
int *scaleCacheX = new int[dstW];
for (uint x = 0; x < dstW; x++) {
diff --git a/graphics/blit/blit-sse2.cpp b/graphics/blit/blit-sse2.cpp
index fb1362ba0b4..33a9c0936ad 100644
--- a/graphics/blit/blit-sse2.cpp
+++ b/graphics/blit/blit-sse2.cpp
@@ -28,82 +28,82 @@
namespace Graphics {
static FORCEINLINE __m128i sse2_mul32(__m128i a, __m128i b) {
- __m128i even = _mm_shuffle_epi32(_mm_mul_epu32(a, b), _MM_SHUFFLE(0, 0, 2, 0));
- __m128i odd = _mm_shuffle_epi32(_mm_mul_epu32(_mm_bsrli_si128(a, 4), _mm_bsrli_si128(b, 4)), _MM_SHUFFLE(0, 0, 2, 0));
- return _mm_unpacklo_epi32(even, odd);
+ __m128i even = _mm_shuffle_epi32(_mm_mul_epu32(a, b), _MM_SHUFFLE(0, 0, 2, 0));
+ __m128i odd = _mm_shuffle_epi32(_mm_mul_epu32(_mm_bsrli_si128(a, 4), _mm_bsrli_si128(b, 4)), _MM_SHUFFLE(0, 0, 2, 0));
+ return _mm_unpacklo_epi32(even, odd);
}
template<bool doscale, bool rgbmod, bool alphamod>
struct AlphaBlend {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m128i ina;
- if (alphamod)
- ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(ca)), 8);
- else
- ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
- __m128i alphaMask = _mm_cmpeq_epi32(ina, _mm_setzero_si128());
-
- if (rgbmod) {
- __m128i dstR = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i dstB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m128i srcR = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i srcB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
-
- dstR = _mm_slli_epi32(_mm_mullo_epi16(dstR, _mm_sub_epi32(_mm_set1_epi32(255), ina)), BlendBlit::kRModShift - 8);
- dstG = _mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_sub_epi32(_mm_set1_epi32(255), ina)), BlendBlit::kGModShift - 8);
- dstB = _mm_mullo_epi16(dstB, _mm_sub_epi32(_mm_set1_epi32(255), ina));
- srcR = _mm_add_epi32(dstR, _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcR, ina), 8), _mm_set1_epi32(cr)), BlendBlit::kRModShift - 8));
- srcG = _mm_add_epi32(dstG, _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8), _mm_set1_epi32(cg)), BlendBlit::kGModShift - 8));
- srcB = _mm_add_epi32(dstB, _mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcB, ina), 8), _mm_set1_epi32(cb)));
- src = _mm_or_si128(_mm_and_si128(srcB, _mm_set1_epi32(BlendBlit::kBModMask)), _mm_set1_epi32(BlendBlit::kAModMask));
- src = _mm_or_si128(_mm_and_si128(srcG, _mm_set1_epi32(BlendBlit::kGModMask)), src);
- src = _mm_or_si128(_mm_and_si128(srcR, _mm_set1_epi32(BlendBlit::kRModMask)), src);
- } else {
- __m128i dstRB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m128i srcRB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
-
- dstRB = _mm_srli_epi32(sse2_mul32(dstRB, _mm_sub_epi32(_mm_set1_epi32(255), ina)), 8);
- dstG = _mm_srli_epi32(_mm_mullo_epi16(dstG, _mm_sub_epi32(_mm_set1_epi32(255), ina)), 8);
- srcRB = _mm_slli_epi32(_mm_add_epi32(dstRB, _mm_srli_epi32(sse2_mul32(srcRB, ina), 8)), BlendBlit::kBModShift);
- srcG = _mm_slli_epi32(_mm_add_epi32(dstG, _mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8)), BlendBlit::kGModShift);
- src = _mm_or_si128(_mm_and_si128(srcG, _mm_set1_epi32(BlendBlit::kGModMask)), _mm_set1_epi32(BlendBlit::kAModMask));
- src = _mm_or_si128(_mm_and_si128(srcRB, _mm_set1_epi32(BlendBlit::kBModMask | BlendBlit::kRModMask)), src);
- }
-
- dst = _mm_and_si128(alphaMask, dst);
- src = _mm_andnot_si128(alphaMask, src);
- return _mm_or_si128(dst, src);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
- uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
- uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
-
- out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
- out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
- out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
- }
- }
+ static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ __m128i ina;
+ if (alphamod)
+ ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(ca)), 8);
+ else
+ ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
+ __m128i alphaMask = _mm_cmpeq_epi32(ina, _mm_setzero_si128());
+
+ if (rgbmod) {
+ __m128i dstR = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i dstB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m128i srcR = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i srcB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+
+ dstR = _mm_slli_epi32(_mm_mullo_epi16(dstR, _mm_sub_epi32(_mm_set1_epi32(255), ina)), BlendBlit::kRModShift - 8);
+ dstG = _mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_sub_epi32(_mm_set1_epi32(255), ina)), BlendBlit::kGModShift - 8);
+ dstB = _mm_mullo_epi16(dstB, _mm_sub_epi32(_mm_set1_epi32(255), ina));
+ srcR = _mm_add_epi32(dstR, _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcR, ina), 8), _mm_set1_epi32(cr)), BlendBlit::kRModShift - 8));
+ srcG = _mm_add_epi32(dstG, _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8), _mm_set1_epi32(cg)), BlendBlit::kGModShift - 8));
+ srcB = _mm_add_epi32(dstB, _mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcB, ina), 8), _mm_set1_epi32(cb)));
+ src = _mm_or_si128(_mm_and_si128(srcB, _mm_set1_epi32(BlendBlit::kBModMask)), _mm_set1_epi32(BlendBlit::kAModMask));
+ src = _mm_or_si128(_mm_and_si128(srcG, _mm_set1_epi32(BlendBlit::kGModMask)), src);
+ src = _mm_or_si128(_mm_and_si128(srcR, _mm_set1_epi32(BlendBlit::kRModMask)), src);
+ } else {
+ __m128i dstRB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m128i srcRB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+
+ dstRB = _mm_srli_epi32(sse2_mul32(dstRB, _mm_sub_epi32(_mm_set1_epi32(255), ina)), 8);
+ dstG = _mm_srli_epi32(_mm_mullo_epi16(dstG, _mm_sub_epi32(_mm_set1_epi32(255), ina)), 8);
+ srcRB = _mm_slli_epi32(_mm_add_epi32(dstRB, _mm_srli_epi32(sse2_mul32(srcRB, ina), 8)), BlendBlit::kBModShift);
+ srcG = _mm_slli_epi32(_mm_add_epi32(dstG, _mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8)), BlendBlit::kGModShift);
+ src = _mm_or_si128(_mm_and_si128(srcG, _mm_set1_epi32(BlendBlit::kGModMask)), _mm_set1_epi32(BlendBlit::kAModMask));
+ src = _mm_or_si128(_mm_and_si128(srcRB, _mm_set1_epi32(BlendBlit::kBModMask | BlendBlit::kRModMask)), src);
+ }
+
+ dst = _mm_and_si128(alphaMask, dst);
+ src = _mm_andnot_si128(alphaMask, src);
+ return _mm_or_si128(dst, src);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
+ uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
+ uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
+
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
+ out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
+ out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct MultiplyBlend {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m128i ina, alphaMask;
- if (alphamod) {
- ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(ca)), 8);
+ static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ __m128i ina, alphaMask;
+ if (alphamod) {
+ ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(ca)), 8);
alphaMask = _mm_cmpeq_epi32(ina, _mm_setzero_si128());
} else {
- ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
+ ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
alphaMask = _mm_set1_epi32(BlendBlit::kAModMask);
}
@@ -123,351 +123,351 @@ struct MultiplyBlend {
src = _mm_or_si128(src, _mm_or_si128(srcB, _mm_or_si128(srcG, srcR)));
} else {
constexpr uint32 rbMask = BlendBlit::kRModMask | BlendBlit::kBModMask;
- __m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i srcRB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(rbMask)), BlendBlit::kBModShift);
- __m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i dstRB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(rbMask)), BlendBlit::kBModShift);
+ __m128i srcG = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i srcRB = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(rbMask)), BlendBlit::kBModShift);
+ __m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i dstRB = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(rbMask)), BlendBlit::kBModShift);
- srcG = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8)), 8), _mm_set1_epi32(BlendBlit::kGModMask));
+ srcG = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8)), 8), _mm_set1_epi32(BlendBlit::kGModMask));
srcRB = _mm_and_si128(_mm_mullo_epi16(dstRB, _mm_srli_epi32(_mm_and_si128(sse2_mul32(srcRB, ina), _mm_set1_epi32(rbMask)), 8)), _mm_set1_epi32(rbMask));
- src = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
- src = _mm_or_si128(src, _mm_or_si128(srcRB, srcG));
- }
-
- dst = _mm_and_si128(alphaMask, dst);
- src = _mm_andnot_si128(alphaMask, src);
- return _mm_or_si128(dst, src);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
- }
- }
+ src = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
+ src = _mm_or_si128(src, _mm_or_si128(srcRB, srcG));
+ }
+
+ dst = _mm_and_si128(alphaMask, dst);
+ src = _mm_andnot_si128(alphaMask, src);
+ return _mm_or_si128(dst, src);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct OpaqueBlend {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- return _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
- }
+ static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ return _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
+ }
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
- }
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct BinaryBlend {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m128i alphaMask = _mm_cmpeq_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_setzero_si128());
- dst = _mm_and_si128(dst, alphaMask);
- src = _mm_andnot_si128(alphaMask, _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)));
- return _mm_or_si128(src, dst);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 pix = *(const uint32 *)in;
- int a = in[BlendBlit::kAIndex];
-
- if (a != 0) { // Full opacity (Any value not exactly 0 is Opaque here)
- *(uint32 *)out = pix;
- out[BlendBlit::kAIndex] = 0xFF;
- }
- }
+ static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ __m128i alphaMask = _mm_cmpeq_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_setzero_si128());
+ dst = _mm_and_si128(dst, alphaMask);
+ src = _mm_andnot_si128(alphaMask, _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)));
+ return _mm_or_si128(src, dst);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 pix = *(const uint32 *)in;
+ int a = in[BlendBlit::kAIndex];
+
+ if (a != 0) { // Full opacity (Any value not exactly 0 is Opaque here)
+ *(uint32 *)out = pix;
+ out[BlendBlit::kAIndex] = 0xFF;
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct AdditiveBlend {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m128i ina;
- if (alphamod)
- ina = _mm_srli_epi32(sse2_mul32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(ca)), 8);
- else
- ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
- __m128i alphaMask = _mm_cmpeq_epi32(ina, _mm_set1_epi32(0));
-
- if (rgbmod) {
- __m128i srcb = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask));
- __m128i srcg = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i srcr = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m128i dstb = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask));
- __m128i dstg = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i dstr = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
-
- srcb = _mm_and_si128(_mm_add_epi32(dstb, _mm_srli_epi32(sse2_mul32(srcb, sse2_mul32(_mm_set1_epi32(cb), ina)), 16)), _mm_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm_and_si128(_mm_add_epi32(dstg, sse2_mul32(srcg, sse2_mul32(_mm_set1_epi32(cg), ina))), _mm_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm_and_si128(_mm_add_epi32(dstr, _mm_srli_epi32(sse2_mul32(srcr, sse2_mul32(_mm_set1_epi32(cr), ina)), BlendBlit::kRModShift - 16)), _mm_set1_epi32(BlendBlit::kRModMask));
-
- src = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kAModMask));
- src = _mm_or_si128(src, _mm_or_si128(srcb, _mm_or_si128(srcg, srcr)));
- } else if (alphamod) {
- __m128i srcg = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask));
- __m128i srcrb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m128i dstg = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask));
- __m128i dstrb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
-
- srcg = _mm_and_si128(_mm_add_epi32(dstg, _mm_srli_epi32(sse2_mul32(srcg, ina), 8)), _mm_set1_epi32(BlendBlit::kGModMask));
- srcrb = _mm_and_si128(_mm_add_epi32(dstrb, sse2_mul32(srcrb, ina)), _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
-
- src = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kAModMask));
- src = _mm_or_si128(src, _mm_or_si128(srcrb, srcg));
- } else {
- __m128i srcg = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask));
- __m128i srcrb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m128i dstg = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask));
- __m128i dstrb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
-
- srcg = _mm_and_si128(_mm_add_epi32(dstg, srcg), _mm_set1_epi32(BlendBlit::kGModMask));
- srcrb = _mm_and_si128(_mm_slli_epi32(_mm_add_epi32(dstrb, srcrb), 8), _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
-
- src = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kAModMask));
- src = _mm_or_si128(src, _mm_or_si128(srcrb, srcg));
- }
-
- dst = _mm_and_si128(alphaMask, dst);
- src = _mm_andnot_si128(alphaMask, src);
- return _mm_or_si128(dst, src);
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
- if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
- }
- }
+ static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ __m128i ina;
+ if (alphamod)
+ ina = _mm_srli_epi32(sse2_mul32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(ca)), 8);
+ else
+ ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
+ __m128i alphaMask = _mm_cmpeq_epi32(ina, _mm_set1_epi32(0));
+
+ if (rgbmod) {
+ __m128i srcb = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask));
+ __m128i srcg = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i srcr = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m128i dstb = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask));
+ __m128i dstg = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i dstr = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+
+ srcb = _mm_and_si128(_mm_add_epi32(dstb, _mm_srli_epi32(sse2_mul32(srcb, sse2_mul32(_mm_set1_epi32(cb), ina)), 16)), _mm_set1_epi32(BlendBlit::kBModMask));
+ srcg = _mm_and_si128(_mm_add_epi32(dstg, sse2_mul32(srcg, sse2_mul32(_mm_set1_epi32(cg), ina))), _mm_set1_epi32(BlendBlit::kGModMask));
+ srcr = _mm_and_si128(_mm_add_epi32(dstr, _mm_srli_epi32(sse2_mul32(srcr, sse2_mul32(_mm_set1_epi32(cr), ina)), BlendBlit::kRModShift - 16)), _mm_set1_epi32(BlendBlit::kRModMask));
+
+ src = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kAModMask));
+ src = _mm_or_si128(src, _mm_or_si128(srcb, _mm_or_si128(srcg, srcr)));
+ } else if (alphamod) {
+ __m128i srcg = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask));
+ __m128i srcrb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m128i dstg = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask));
+ __m128i dstrb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+
+ srcg = _mm_and_si128(_mm_add_epi32(dstg, _mm_srli_epi32(sse2_mul32(srcg, ina), 8)), _mm_set1_epi32(BlendBlit::kGModMask));
+ srcrb = _mm_and_si128(_mm_add_epi32(dstrb, sse2_mul32(srcrb, ina)), _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
+
+ src = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kAModMask));
+ src = _mm_or_si128(src, _mm_or_si128(srcrb, srcg));
+ } else {
+ __m128i srcg = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask));
+ __m128i srcrb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m128i dstg = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask));
+ __m128i dstrb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+
+ srcg = _mm_and_si128(_mm_add_epi32(dstg, srcg), _mm_set1_epi32(BlendBlit::kGModMask));
+ srcrb = _mm_and_si128(_mm_slli_epi32(_mm_add_epi32(dstrb, srcrb), 8), _mm_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
+
+ src = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kAModMask));
+ src = _mm_or_si128(src, _mm_or_si128(srcrb, srcg));
+ }
+
+ dst = _mm_and_si128(alphaMask, dst);
+ src = _mm_andnot_si128(alphaMask, src);
+ return _mm_or_si128(dst, src);
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+ if (ina != 0) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
+ }
+ }
};
template<bool doscale, bool rgbmod, bool alphamod>
struct SubtractiveBlend {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m128i ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
- __m128i srcb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m128i srcg = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i srcr = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m128i dstb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m128i dstg = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m128i dstr = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
-
- srcb = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstb, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcb, _mm_set1_epi32(cb)), sse2_mul32(dstb, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kBModShift), _mm_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstg, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcg, _mm_set1_epi32(cg)), sse2_mul32(dstg, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kGModShift), _mm_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstr, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcr, _mm_set1_epi32(cr)), sse2_mul32(dstr, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kRModShift), _mm_set1_epi32(BlendBlit::kRModMask));
-
- return _mm_or_si128(_mm_set1_epi32(BlendBlit::kAModMask), _mm_or_si128(srcb, _mm_or_si128(srcg, srcr)));
- }
-
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- }
+ static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+ __m128i ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
+ __m128i srcb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m128i srcg = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i srcr = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m128i dstb = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m128i dstg = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m128i dstr = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+
+ srcb = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstb, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcb, _mm_set1_epi32(cb)), sse2_mul32(dstb, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kBModShift), _mm_set1_epi32(BlendBlit::kBModMask));
+ srcg = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstg, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcg, _mm_set1_epi32(cg)), sse2_mul32(dstg, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kGModShift), _mm_set1_epi32(BlendBlit::kGModMask));
+ srcr = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstr, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcr, _mm_set1_epi32(cr)), sse2_mul32(dstr, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kRModShift), _mm_set1_epi32(BlendBlit::kRModMask));
+
+ return _mm_or_si128(_mm_set1_epi32(BlendBlit::kAModMask), _mm_or_si128(srcb, _mm_or_si128(srcg, srcr)));
+ }
+
+ static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ }
};
class BlendBlitImpl {
public:
template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
static inline void blitInnerLoop(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
- const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
- const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
- const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
- const uint32 cr = coloradd1 ? (rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod ? rawcr : 255);
- const uint32 cg = coloradd1 ? (rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod ? rawcg : 255);
- const uint32 cb = coloradd1 ? (rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod ? rawcb : 255);
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- if (!doscale && (args.flipping & FLIP_H)) args.ino -= 4 * 3;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
-
- uint32 j = 0;
- for (; j + 4 <= args.width; j += 4) {
- __m128i dstPixels, srcPixels;
- if (loaddst) dstPixels = _mm_loadu_si128((const __m128i *)out);
- if (!doscale) {
- srcPixels = _mm_loadu_si128((const __m128i *)in);
- } else {
- srcPixels = _mm_setr_epi32(
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 0) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 1) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 2) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
- *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 3) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep)
- );
- scaleXCtr += args.scaleX * 4;
- }
- if (!doscale && (args.flipping & FLIP_H)) {
- srcPixels = _mm_shuffle_epi32(srcPixels, _MM_SHUFFLE(0, 1, 2, 3));
- }
- {
- const __m128i res = PixelFunc<doscale, rgbmod, alphamod>::simd(srcPixels, dstPixels, args.flipping & FLIP_H, ca, cr, cg, cb);
- _mm_storeu_si128((__m128i *)out, res);
- }
- if (!doscale) in += (ptrdiff_t)args.inStep * 4;
- out += 4ULL * 4;
- }
- if (!doscale && (args.flipping & FLIP_H)) in += 4 * 3;
- for (; j < args.width; j++) {
- if (doscale) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- }
-
- PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
-
- if (doscale)
- scaleXCtr += args.scaleX;
- else
- in += args.inStep;
- out += 4;
- }
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
+ const byte *in;
+ byte *out;
+
+ const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
+ const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
+ const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
+ const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
+ const uint32 cr = coloradd1 ? (rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod ? rawcr : 255);
+ const uint32 cg = coloradd1 ? (rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod ? rawcg : 255);
+ const uint32 cb = coloradd1 ? (rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod ? rawcb : 255);
+
+ int scaleXCtr, scaleYCtr = args.scaleYoff;
+ const byte *inBase;
+
+ if (!doscale && (args.flipping & FLIP_H)) args.ino -= 4 * 3;
+
+ for (uint32 i = 0; i < args.height; i++) {
+ if (doscale) {
+ inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+ scaleXCtr = args.scaleXoff;
+ } else {
+ in = args.ino;
+ }
+ out = args.outo;
+
+ uint32 j = 0;
+ for (; j + 4 <= args.width; j += 4) {
+ __m128i dstPixels, srcPixels;
+ if (loaddst) dstPixels = _mm_loadu_si128((const __m128i *)out);
+ if (!doscale) {
+ srcPixels = _mm_loadu_si128((const __m128i *)in);
+ } else {
+ srcPixels = _mm_setr_epi32(
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 0) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 1) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 2) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep),
+ *(const uint32 *)(inBase + (ptrdiff_t)(scaleXCtr + args.scaleX * 3) / (ptrdiff_t)BlendBlit::SCALE_THRESHOLD * args.inStep)
+ );
+ scaleXCtr += args.scaleX * 4;
+ }
+ if (!doscale && (args.flipping & FLIP_H)) {
+ srcPixels = _mm_shuffle_epi32(srcPixels, _MM_SHUFFLE(0, 1, 2, 3));
+ }
+ {
+ const __m128i res = PixelFunc<doscale, rgbmod, alphamod>::simd(srcPixels, dstPixels, args.flipping & FLIP_H, ca, cr, cg, cb);
+ _mm_storeu_si128((__m128i *)out, res);
+ }
+ if (!doscale) in += (ptrdiff_t)args.inStep * 4;
+ out += 4ULL * 4;
+ }
+ if (!doscale && (args.flipping & FLIP_H)) in += 4 * 3;
+ for (; j < args.width; j++) {
+ if (doscale) {
+ in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+ }
+
+ PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
+
+ if (doscale)
+ scaleXCtr += args.scaleX;
+ else
+ in += args.inStep;
+ out += 4;
+ }
+ if (doscale)
+ scaleYCtr += args.scaleY;
+ else
+ args.ino += args.inoStep;
+ args.outo += args.dstPitch;
+ }
}
}; // End of class BlendBlitImpl
void BlendBlit::blitSSE2(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
- bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
- bool alphamod = ((args.color & kAModMask) != kAModMask);
- if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
- if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, false, false, false, false, true>(args);
- } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, false, false, false, false, true>(args);
- } else {
- if (blendMode == BLEND_ADDITIVE) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, false, false, true>(args);
- }
- }
- } else if (blendMode == BLEND_SUBTRACTIVE) {
- if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, true, false, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, false, false, false, true>(args);
- }
- } else if (blendMode == BLEND_MULTIPLY) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, false, false, true>(args);
- }
- }
- } else {
- assert(blendMode == BLEND_NORMAL);
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, false, false, true>(args);
- }
- }
- }
- }
- } else {
- if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, true, false, false, false, true>(args);
- } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, true, false, false, false, true>(args);
- } else {
- if (blendMode == BLEND_ADDITIVE) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, false, false, true>(args);
- }
- }
- } else if (blendMode == BLEND_SUBTRACTIVE) {
- if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, true, false, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, false, false, false, true>(args);
- }
- } else if (blendMode == BLEND_MULTIPLY) {
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, false, false, true>(args);
- }
- }
- } else {
- assert(blendMode == BLEND_NORMAL);
- if (rgbmod) {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, false, false, true>(args);
- }
- } else {
- if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, true, false, true>(args);
- } else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, false, false, true>(args);
- }
- }
- }
- }
- }
+ bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
+ bool alphamod = ((args.color & kAModMask) != kAModMask);
+ if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
+ if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+ BlendBlitImpl::blitInnerLoop<OpaqueBlend, false, false, false, false, true>(args);
+ } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+ BlendBlitImpl::blitInnerLoop<BinaryBlend, false, false, false, false, true>(args);
+ } else {
+ if (blendMode == BLEND_ADDITIVE) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, false, false, true>(args);
+ }
+ }
+ } else if (blendMode == BLEND_SUBTRACTIVE) {
+ if (rgbmod) {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, true, false, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, false, false, false, true>(args);
+ }
+ } else if (blendMode == BLEND_MULTIPLY) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, false, false, true>(args);
+ }
+ }
+ } else {
+ assert(blendMode == BLEND_NORMAL);
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, false, false, true>(args);
+ }
+ }
+ }
+ }
+ } else {
+ if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+ BlendBlitImpl::blitInnerLoop<OpaqueBlend, true, false, false, false, true>(args);
+ } else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+ BlendBlitImpl::blitInnerLoop<BinaryBlend, true, false, false, false, true>(args);
+ } else {
+ if (blendMode == BLEND_ADDITIVE) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, false, false, true>(args);
+ }
+ }
+ } else if (blendMode == BLEND_SUBTRACTIVE) {
+ if (rgbmod) {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, true, false, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, false, false, false, true>(args);
+ }
+ } else if (blendMode == BLEND_MULTIPLY) {
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, false, false, true>(args);
+ }
+ }
+ } else {
+ assert(blendMode == BLEND_NORMAL);
+ if (rgbmod) {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, false, false, true>(args);
+ }
+ } else {
+ if (alphamod) {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, true, false, true>(args);
+ } else {
+ BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, false, false, true>(args);
+ }
+ }
+ }
+ }
+ }
}
} // End of namespace Graphics
Commit: 0d0aa24df3140e7147c30ce156bccc438b186a46
https://github.com/scummvm/scummvm/commit/0d0aa24df3140e7147c30ce156bccc438b186a46
Author: Kaloyan Chehlarski (strahy at outlook.com)
Date: 2023-10-22T00:25:56+03:00
Commit Message:
GRAPHICS: Allow AVX2 optimized blit path to build
The way the SIMD blitting paths were previously organized
ensured that the AVX2 one would never actually build. This
commit separates the different implementations into
classes with differing names, ensuring the shadowing
cannot occur.
Changed paths:
graphics/blit.h
graphics/blit/blit-alpha.cpp
graphics/blit/blit-avx2.cpp
graphics/blit/blit-neon.cpp
graphics/blit/blit-sse2.cpp
diff --git a/graphics/blit.h b/graphics/blit.h
index 4c9a91e0da2..eca242eb206 100644
--- a/graphics/blit.h
+++ b/graphics/blit.h
@@ -231,7 +231,10 @@ private:
typedef void(*BlitFunc)(Args &, const TSpriteBlendMode &, const AlphaType &);
static BlitFunc blitFunc;
friend class ::BlendBlitUnfilteredTestSuite;
- friend class BlendBlitImpl;
+ friend class BlendBlitImpl_Default;
+ friend class BlendBlitImpl_NEON;
+ friend class BlendBlitImpl_SSE2;
+ friend class BlendBlitImpl_AVX2;
public:
static const int SCALE_THRESHOLD = 0x100;
diff --git a/graphics/blit/blit-alpha.cpp b/graphics/blit/blit-alpha.cpp
index bcf7b4cba4a..5975410c69a 100644
--- a/graphics/blit/blit-alpha.cpp
+++ b/graphics/blit/blit-alpha.cpp
@@ -238,7 +238,7 @@ void BlendBlit::blit(byte *dst, const byte *src,
blitFunc(args, blendMode, alphaType);
}
-class BlendBlitImpl {
+class BlendBlitImpl_Default {
public:
/**
* Optimized version of doBlit to be used with multiply blended blitting
@@ -543,121 +543,121 @@ static void doBlitBinaryBlendLogicGeneric(BlendBlit::Args &args) {
}
}
-}; // end of class BlendBlitImpl
+}; // end of class BlendBlitImpl_Default
void BlendBlit::blitGeneric(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
bool alphamod = ((args.color & kAModMask) != kAModMask);
if (args.scaleX == BlendBlit::SCALE_THRESHOLD && args.scaleY == BlendBlit::SCALE_THRESHOLD) {
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::doBlitOpaqueBlendLogicGeneric<false>(args);
+ BlendBlitImpl_Default::doBlitOpaqueBlendLogicGeneric<false>(args);
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::doBlitBinaryBlendLogicGeneric<false>(args);
+ BlendBlitImpl_Default::doBlitBinaryBlendLogicGeneric<false>(args);
} else {
if (blendMode == BLEND_ADDITIVE) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, true, true>(args);
+ BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, true, true>(args);
} else {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, true, false>(args);
+ BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, true, false>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, false, true>(args);
+ BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, false, true>(args);
} else {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<false, false, false>(args);
+ BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, false, false>(args);
}
}
} else if (blendMode == BLEND_SUBTRACTIVE) {
if (rgbmod) {
- BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<false, true>(args);
+ BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<false, true>(args);
} else {
- BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<false, false>(args);
+ BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<false, false>(args);
}
} else if (blendMode == BLEND_MULTIPLY) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, true, true>(args);
+ BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, true, true>(args);
} else {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, true, false>(args);
+ BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, true, false>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, false, true>(args);
+ BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, false, true>(args);
} else {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<false, false, false>(args);
+ BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, false, false>(args);
}
}
} else {
assert(blendMode == BLEND_NORMAL);
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, true, true>(args);
+ BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, true, true>(args);
} else {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, true, false>(args);
+ BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, true, false>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, false, true>(args);
+ BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, false, true>(args);
} else {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<false, false, false>(args);
+ BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, false, false>(args);
}
}
}
}
} else {
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::doBlitOpaqueBlendLogicGeneric<true>(args);
+ BlendBlitImpl_Default::doBlitOpaqueBlendLogicGeneric<true>(args);
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::doBlitBinaryBlendLogicGeneric<true>(args);
+ BlendBlitImpl_Default::doBlitBinaryBlendLogicGeneric<true>(args);
} else {
if (blendMode == BLEND_ADDITIVE) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, true, true>(args);
+ BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, true, true>(args);
} else {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, true, false>(args);
+ BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, true, false>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, false, true>(args);
+ BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, false, true>(args);
} else {
- BlendBlitImpl::doBlitAdditiveBlendLogicGeneric<true, false, false>(args);
+ BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, false, false>(args);
}
}
} else if (blendMode == BLEND_SUBTRACTIVE) {
if (rgbmod) {
- BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<true, true>(args);
+ BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<true, true>(args);
} else {
- BlendBlitImpl::doBlitSubtractiveBlendLogicGeneric<true, false>(args);
+ BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<true, false>(args);
}
} else if (blendMode == BLEND_MULTIPLY) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, true, true>(args);
+ BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, true, true>(args);
} else {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, true, false>(args);
+ BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, true, false>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, false, true>(args);
+ BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, false, true>(args);
} else {
- BlendBlitImpl::doBlitMultiplyBlendLogicGeneric<true, false, false>(args);
+ BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, false, false>(args);
}
}
} else {
assert(blendMode == BLEND_NORMAL);
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, true, true>(args);
+ BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, true, true>(args);
} else {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, true, false>(args);
+ BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, true, false>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, false, true>(args);
+ BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, false, true>(args);
} else {
- BlendBlitImpl::doBlitAlphaBlendLogicGeneric<true, false, false>(args);
+ BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, false, false>(args);
}
}
}
diff --git a/graphics/blit/blit-avx2.cpp b/graphics/blit/blit-avx2.cpp
index 02dec39e1b1..ef3049bba33 100644
--- a/graphics/blit/blit-avx2.cpp
+++ b/graphics/blit/blit-avx2.cpp
@@ -27,6 +27,9 @@
namespace Graphics {
+class BlendBlitImpl_AVX2 {
+ friend class BlendBlit;
+
template<bool doscale, bool rgbmod, bool alphamod>
struct AlphaBlend {
static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
@@ -261,7 +264,6 @@ struct SubtractiveBlend {
}
};
-class BlendBlitImpl {
public:
template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
static void blitInnerLoop(BlendBlit::Args &args) {
@@ -342,121 +344,121 @@ static void blitInnerLoop(BlendBlit::Args &args) {
}
}
-}; // end of class BlendBlitImpl
+}; // end of class BlendBlitImpl_AVX2
void BlendBlit::blitAVX2(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
bool alphamod = ((args.color & kAModMask) != kAModMask);
if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::OpaqueBlend, false, false, false, false, true>(args);
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::BinaryBlend, false, false, false, false, true>(args);
} else {
if (blendMode == BLEND_ADDITIVE) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, false, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, false, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, false, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, false, false, false, false, true>(args);
}
}
} else if (blendMode == BLEND_SUBTRACTIVE) {
if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::SubtractiveBlend, false, true, false, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::SubtractiveBlend, false, false, false, false, true>(args);
}
} else if (blendMode == BLEND_MULTIPLY) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, false, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, false, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, false, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, false, false, false, false, true>(args);
}
}
} else {
assert(blendMode == BLEND_NORMAL);
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, false, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, false, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, false, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, false, false, false, false, true>(args);
}
}
}
}
} else {
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::OpaqueBlend, true, false, false, false, true>(args);
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::BinaryBlend, true, false, false, false, true>(args);
} else {
if (blendMode == BLEND_ADDITIVE) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, true, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, true, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, true, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, true, false, false, false, true>(args);
}
}
} else if (blendMode == BLEND_SUBTRACTIVE) {
if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::SubtractiveBlend, true, true, false, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::SubtractiveBlend, true, false, false, false, true>(args);
}
} else if (blendMode == BLEND_MULTIPLY) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, true, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, true, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, true, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, true, false, false, false, true>(args);
}
}
} else {
assert(blendMode == BLEND_NORMAL);
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, true, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, true, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, true, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, true, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, true, false, false, false, true>(args);
}
}
}
diff --git a/graphics/blit/blit-neon.cpp b/graphics/blit/blit-neon.cpp
index 3f89078a044..58bc8a3747f 100644
--- a/graphics/blit/blit-neon.cpp
+++ b/graphics/blit/blit-neon.cpp
@@ -29,6 +29,9 @@
namespace Graphics {
+class BlendBlitImpl_NEON {
+ friend class BlendBlit;
+
template<bool doscale, bool rgbmod, bool alphamod>
struct AlphaBlend {
static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
@@ -263,8 +266,6 @@ struct SubtractiveBlend {
}
};
-class BlendBlitImpl {
-
public:
template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
static inline void blitInnerLoop(BlendBlit::Args &args) {
@@ -342,121 +343,121 @@ static inline void blitInnerLoop(BlendBlit::Args &args) {
}
}
-}; // end of class BlendBlitImpl
+}; // end of class BlendBlitImpl_NEON
void BlendBlit::blitNEON(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
bool alphamod = ((args.color & kAModMask) != kAModMask);
if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::OpaqueBlend, false, false, false, false, true>(args);
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::BinaryBlend, false, false, false, false, true>(args);
} else {
if (blendMode == BLEND_ADDITIVE) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, false, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, false, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, false, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, false, false, false, false, true>(args);
}
}
} else if (blendMode == BLEND_SUBTRACTIVE) {
if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::SubtractiveBlend, false, true, false, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::SubtractiveBlend, false, false, false, false, true>(args);
}
} else if (blendMode == BLEND_MULTIPLY) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, false, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, false, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, false, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, false, false, false, false, true>(args);
}
}
} else {
assert(blendMode == BLEND_NORMAL);
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, false, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, false, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, false, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, false, false, false, false, true>(args);
}
}
}
}
} else {
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::OpaqueBlend, true, false, false, false, true>(args);
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::BinaryBlend, true, false, false, false, true>(args);
} else {
if (blendMode == BLEND_ADDITIVE) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, true, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, true, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, true, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, true, false, false, false, true>(args);
}
}
} else if (blendMode == BLEND_SUBTRACTIVE) {
if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::SubtractiveBlend, true, true, false, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::SubtractiveBlend, true, false, false, false, true>(args);
}
} else if (blendMode == BLEND_MULTIPLY) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, true, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, true, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, true, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, true, false, false, false, true>(args);
}
}
} else {
assert(blendMode == BLEND_NORMAL);
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, true, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, true, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, true, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, true, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, true, false, false, false, true>(args);
}
}
}
diff --git a/graphics/blit/blit-sse2.cpp b/graphics/blit/blit-sse2.cpp
index 33a9c0936ad..1962621c68c 100644
--- a/graphics/blit/blit-sse2.cpp
+++ b/graphics/blit/blit-sse2.cpp
@@ -33,6 +33,9 @@ static FORCEINLINE __m128i sse2_mul32(__m128i a, __m128i b) {
return _mm_unpacklo_epi32(even, odd);
}
+class BlendBlitImpl_SSE2 {
+ friend class BlendBlit;
+
template<bool doscale, bool rgbmod, bool alphamod>
struct AlphaBlend {
static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
@@ -272,7 +275,6 @@ struct SubtractiveBlend {
}
};
-class BlendBlitImpl {
public:
template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
static inline void blitInnerLoop(BlendBlit::Args &args) {
@@ -348,121 +350,121 @@ static inline void blitInnerLoop(BlendBlit::Args &args) {
}
}
-}; // End of class BlendBlitImpl
+}; // End of class BlendBlitImpl_SSE2
void BlendBlit::blitSSE2(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
bool rgbmod = ((args.color & kRGBModMask) != kRGBModMask);
bool alphamod = ((args.color & kAModMask) != kAModMask);
if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::OpaqueBlend, false, false, false, false, true>(args);
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::BinaryBlend, false, false, false, false, true>(args);
} else {
if (blendMode == BLEND_ADDITIVE) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, false, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, false, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, false, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, false, false, false, false, true>(args);
}
}
} else if (blendMode == BLEND_SUBTRACTIVE) {
if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::SubtractiveBlend, false, true, false, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::SubtractiveBlend, false, false, false, false, true>(args);
}
} else if (blendMode == BLEND_MULTIPLY) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, false, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, false, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, false, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, false, false, false, false, true>(args);
}
}
} else {
assert(blendMode == BLEND_NORMAL);
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, false, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, true, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, false, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, false, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, false, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, false, false, false, false, true>(args);
}
}
}
}
} else {
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- BlendBlitImpl::blitInnerLoop<OpaqueBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::OpaqueBlend, true, false, false, false, true>(args);
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- BlendBlitImpl::blitInnerLoop<BinaryBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::BinaryBlend, true, false, false, false, true>(args);
} else {
if (blendMode == BLEND_ADDITIVE) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, true, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, true, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, true, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AdditiveBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, true, false, false, false, true>(args);
}
}
} else if (blendMode == BLEND_SUBTRACTIVE) {
if (rgbmod) {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::SubtractiveBlend, true, true, false, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<SubtractiveBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::SubtractiveBlend, true, false, false, false, true>(args);
}
} else if (blendMode == BLEND_MULTIPLY) {
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, true, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, true, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, true, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<MultiplyBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, true, false, false, false, true>(args);
}
}
} else {
assert(blendMode == BLEND_NORMAL);
if (rgbmod) {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, true, true, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, true, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, true, true, false, false, true>(args);
}
} else {
if (alphamod) {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, true, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, true, false, true, false, true>(args);
} else {
- BlendBlitImpl::blitInnerLoop<AlphaBlend, true, false, false, false, true>(args);
+ BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, true, false, false, false, true>(args);
}
}
}
Commit: b0de3e786c602d1dcb9f3c1db6b9a4911944f973
https://github.com/scummvm/scummvm/commit/b0de3e786c602d1dcb9f3c1db6b9a4911944f973
Author: Kaloyan Chehlarski (strahy at outlook.com)
Date: 2023-10-22T00:25:56+03:00
Commit Message:
GRAPHICS: Fix AVX2 path for MultiplyBlend
Applies the same fixes to multiply blending in the AVX2
optimized path that were previously applied to the SSE2 path.
Namely, alpha blending and colors now both blend correctly
Changed paths:
graphics/blit/blit-avx2.cpp
diff --git a/graphics/blit/blit-avx2.cpp b/graphics/blit/blit-avx2.cpp
index ef3049bba33..85170df00c0 100644
--- a/graphics/blit/blit-avx2.cpp
+++ b/graphics/blit/blit-avx2.cpp
@@ -95,36 +95,41 @@ struct AlphaBlend {
template<bool doscale, bool rgbmod, bool alphamod>
struct MultiplyBlend {
static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
- __m256i ina;
- if (alphamod)
+ __m256i ina, alphaMask;
+ if (alphamod) {
ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(ca)), 8);
- else
+ alphaMask = _mm256_cmpeq_epi32(ina, _mm256_setzero_si256());
+ } else {
ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
- __m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_setzero_si256());
+ alphaMask = _mm256_set1_epi32(BlendBlit::kAModMask);
+ }
if (rgbmod) {
- __m256i srcb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i srcg = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i srcr = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- __m256i dstb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
- __m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m256i srcB = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i srcG = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i srcR = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
+ __m256i dstB = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i dstR = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- srcb = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcb, _mm256_set1_epi32(cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm256_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcg, _mm256_set1_epi32(cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcr, _mm256_set1_epi32(cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm256_set1_epi32(BlendBlit::kRModMask));
+ srcB = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstB, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcB, _mm256_set1_epi32(cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm256_set1_epi32(BlendBlit::kBModMask));
+ srcG = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstG, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcG, _mm256_set1_epi32(cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcR = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstR, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcR, _mm256_set1_epi32(cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm256_set1_epi32(BlendBlit::kRModMask));
src = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
- src = _mm256_or_si256(src, _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcr)));
+ src = _mm256_or_si256(src, _mm256_or_si256(srcB, _mm256_or_si256(srcG, srcR)));
} else {
- __m256i srcg = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask));
- __m256i srcrb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- __m256i dstg = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask));
- __m256i dstrb = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
- srcg = _mm256_and_si256(_mm256_srli_epi32(_mm256_mullo_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(srcg, ina), 8)), 8), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcrb = _mm256_and_si256(_mm256_mullo_epi32(dstrb, _mm256_srli_epi32(_mm256_mullo_epi32(srcrb, ina), 8)), _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask));
+ constexpr uint32 rbMask = BlendBlit::kRModMask | BlendBlit::kBModMask;
+ __m256i dstRB = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i srcRB = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kRModMask | BlendBlit::kBModMask)), BlendBlit::kBModShift);
+ __m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+ __m256i srcG = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
+
+ srcG = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi16(dstG, _mm256_srli_epi32(_mm256_mullo_epi16(srcG, ina), 8)), 8), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcRB = _mm256_and_si256(_mm256_mullo_epi16(dstRB, _mm256_srli_epi32(_mm256_and_si256(_mm256_mullo_epi32(srcRB, ina), _mm256_set1_epi32(rbMask)), 8)), _mm256_set1_epi32(rbMask));
+
src = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
- src = _mm256_or_si256(src, _mm256_or_si256(srcrb, srcg));
+ src = _mm256_or_si256(src, _mm256_or_si256(srcRB, srcG));
}
dst = _mm256_and_si256(alphaMask, dst);
More information about the Scummvm-git-logs
mailing list