[Scummvm-git-logs] scummvm master -> ec53c5ea87c7350eded25e6333c77bbbd3519a8d
bluegr
noreply at scummvm.org
Sat Sep 28 23:19:46 UTC 2024
This automated email contains information about 3 new commits which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .
Summary:
433d6804f7 GRAPHICS: Make the blend structs into objects on the stack
c41edfd2e5 GRAPHICS: Some optimisations for the alpha blending routines
ec53c5ea87 GRAPHICS: Simplify the generic blending routines
Commit: 433d6804f714b427f51d3900ff375f72c890339a
https://github.com/scummvm/scummvm/commit/433d6804f714b427f51d3900ff375f72c890339a
Author: Cameron Cawley (ccawley2011 at gmail.com)
Date: 2024-09-29T02:19:42+03:00
Commit Message:
GRAPHICS: Make the blend structs into objects on the stack
Changed paths:
graphics/blit/blit-alpha.h
graphics/blit/blit-avx2.cpp
graphics/blit/blit-generic.cpp
graphics/blit/blit-neon.cpp
graphics/blit/blit-sse2.cpp
diff --git a/graphics/blit/blit-alpha.h b/graphics/blit/blit-alpha.h
index 0abc3094895..0fbe93bd1da 100644
--- a/graphics/blit/blit-alpha.h
+++ b/graphics/blit/blit-alpha.h
@@ -27,10 +27,26 @@ class BlendBlitImpl_Base {
friend class BlendBlit;
protected:
-template<bool doscale, bool rgbmod, bool alphamod>
-struct AlphaBlend {
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+template<bool rgbmod, bool alphamod>
+struct BaseBlend {
+public:
+ constexpr BaseBlend(const uint32 color) :
+ ca(alphamod ? ((color >> BlendBlit::kAModShift) & 0xFF) : 255),
+ cr(rgbmod ? ((color >> BlendBlit::kRModShift) & 0xFF) : 255),
+ cg(rgbmod ? ((color >> BlendBlit::kGModShift) & 0xFF) : 255),
+ cb(rgbmod ? ((color >> BlendBlit::kBModShift) & 0xFF) : 255) {}
+
+protected:
+ const byte ca, cr, cg, cb;
+};
+
+template<bool rgbmod, bool alphamod>
+struct AlphaBlend : public BaseBlend<rgbmod, alphamod> {
+public:
+ constexpr AlphaBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
+
+ inline void normal(const byte *in, byte *out) const {
+ uint32 ina = in[BlendBlit::kAIndex] * this->ca >> 8;
if (ina != 0) {
if (rgbmod) {
@@ -39,9 +55,9 @@ struct AlphaBlend {
const uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
- out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
- out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
+ out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * this->cb >> 16);
+ out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * this->cg >> 16);
+ out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * this->cr >> 16);
} else {
out[BlendBlit::kAIndex] = 255;
out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * (255 - ina) + in[BlendBlit::kBIndex] * ina) >> 8;
@@ -53,29 +69,38 @@ struct AlphaBlend {
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct MultiplyBlend {
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+template<bool rgbmod, bool alphamod>
+struct MultiplyBlend : public BaseBlend<rgbmod, alphamod> {
+public:
+ constexpr MultiplyBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
+
+ inline void normal(const byte *in, byte *out) const {
+ uint32 ina = in[BlendBlit::kAIndex] * this->ca >> 8;
if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * this->cb * ina) >> 16) >> 8;
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * this->cg * ina) >> 16) >> 8;
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * this->cr * ina) >> 16) >> 8;
}
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct OpaqueBlend {
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct OpaqueBlend : public BaseBlend<rgbmod, alphamod> {
+public:
+ constexpr OpaqueBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
+
+ inline void normal(const byte *in, byte *out) const {
*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct BinaryBlend {
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct BinaryBlend : public BaseBlend<rgbmod, alphamod> {
+public:
+ constexpr BinaryBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
+
+ inline void normal(const byte *in, byte *out) const {
uint32 pix = *(const uint32 *)in;
int a = in[BlendBlit::kAIndex];
@@ -86,26 +111,32 @@ struct BinaryBlend {
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct AdditiveBlend {
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
- uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+template<bool rgbmod, bool alphamod>
+struct AdditiveBlend : public BaseBlend<rgbmod, alphamod> {
+public:
+ constexpr AdditiveBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
+
+ inline void normal(const byte *in, byte *out) const {
+ uint32 ina = in[BlendBlit::kAIndex] * this->ca >> 8;
if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * this->cb * ina) >> 16);
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * this->cg * ina) >> 16);
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * this->cr * ina) >> 16);
}
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct SubtractiveBlend {
- static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct SubtractiveBlend : public BaseBlend<rgbmod, alphamod> {
+public:
+ constexpr SubtractiveBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
+
+ inline void normal(const byte *in, byte *out) const {
out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * this->cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * this->cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+ out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * this->cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
}
};
@@ -117,114 +148,114 @@ void BlendBlit::blitT(Args &args, const TSpriteBlendMode &blendMode, const Alpha
bool alphamod = ((args.color & kAModMask) != kAModMask);
if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- T::template blitInnerLoop<T::template OpaqueBlend, false, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template OpaqueBlend, false, false, false>(args);
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- T::template blitInnerLoop<T::template BinaryBlend, false, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template BinaryBlend, false, false, false>(args);
} else {
if (blendMode == BLEND_ADDITIVE) {
if (rgbmod) {
if (alphamod) {
- T::template blitInnerLoop<T::template AdditiveBlend, false, true, true, false, true>(args);
+ T::template blitInnerLoop<T::template AdditiveBlend, false, true, true>(args);
} else {
- T::template blitInnerLoop<T::template AdditiveBlend, false, true, false, false, true>(args);
+ T::template blitInnerLoop<T::template AdditiveBlend, false, true, false>(args);
}
} else {
if (alphamod) {
- T::template blitInnerLoop<T::template AdditiveBlend, false, false, true, false, true>(args);
+ T::template blitInnerLoop<T::template AdditiveBlend, false, false, true>(args);
} else {
- T::template blitInnerLoop<T::template AdditiveBlend, false, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template AdditiveBlend, false, false, false>(args);
}
}
} else if (blendMode == BLEND_SUBTRACTIVE) {
if (rgbmod) {
- T::template blitInnerLoop<T::template SubtractiveBlend, false, true, false, false, true>(args);
+ T::template blitInnerLoop<T::template SubtractiveBlend, false, true, false>(args);
} else {
- T::template blitInnerLoop<T::template SubtractiveBlend, false, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template SubtractiveBlend, false, false, false>(args);
}
} else if (blendMode == BLEND_MULTIPLY) {
if (rgbmod) {
if (alphamod) {
- T::template blitInnerLoop<T::template MultiplyBlend, false, true, true, false, true>(args);
+ T::template blitInnerLoop<T::template MultiplyBlend, false, true, true>(args);
} else {
- T::template blitInnerLoop<T::template MultiplyBlend, false, true, false, false, true>(args);
+ T::template blitInnerLoop<T::template MultiplyBlend, false, true, false>(args);
}
} else {
if (alphamod) {
- T::template blitInnerLoop<T::template MultiplyBlend, false, false, true, false, true>(args);
+ T::template blitInnerLoop<T::template MultiplyBlend, false, false, true>(args);
} else {
- T::template blitInnerLoop<T::template MultiplyBlend, false, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template MultiplyBlend, false, false, false>(args);
}
}
} else {
assert(blendMode == BLEND_NORMAL);
if (rgbmod) {
if (alphamod) {
- T::template blitInnerLoop<T::template AlphaBlend, false, true, true, false, true>(args);
+ T::template blitInnerLoop<T::template AlphaBlend, false, true, true>(args);
} else {
- T::template blitInnerLoop<T::template AlphaBlend, false, true, false, false, true>(args);
+ T::template blitInnerLoop<T::template AlphaBlend, false, true, false>(args);
}
} else {
if (alphamod) {
- T::template blitInnerLoop<T::template AlphaBlend, false, false, true, false, true>(args);
+ T::template blitInnerLoop<T::template AlphaBlend, false, false, true>(args);
} else {
- T::template blitInnerLoop<T::template AlphaBlend, false, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template AlphaBlend, false, false, false>(args);
}
}
}
}
} else {
if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
- T::template blitInnerLoop<T::template OpaqueBlend, true, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template OpaqueBlend, true, false, false>(args);
} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
- T::template blitInnerLoop<T::template BinaryBlend, true, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template BinaryBlend, true, false, false>(args);
} else {
if (blendMode == BLEND_ADDITIVE) {
if (rgbmod) {
if (alphamod) {
- T::template blitInnerLoop<T::template AdditiveBlend, true, true, true, false, true>(args);
+ T::template blitInnerLoop<T::template AdditiveBlend, true, true, true>(args);
} else {
- T::template blitInnerLoop<T::template AdditiveBlend, true, true, false, false, true>(args);
+ T::template blitInnerLoop<T::template AdditiveBlend, true, true, false>(args);
}
} else {
if (alphamod) {
- T::template blitInnerLoop<T::template AdditiveBlend, true, false, true, false, true>(args);
+ T::template blitInnerLoop<T::template AdditiveBlend, true, false, true>(args);
} else {
- T::template blitInnerLoop<T::template AdditiveBlend, true, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template AdditiveBlend, true, false, false>(args);
}
}
} else if (blendMode == BLEND_SUBTRACTIVE) {
if (rgbmod) {
- T::template blitInnerLoop<T::template SubtractiveBlend, true, true, false, false, true>(args);
+ T::template blitInnerLoop<T::template SubtractiveBlend, true, true, false>(args);
} else {
- T::template blitInnerLoop<T::template SubtractiveBlend, true, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template SubtractiveBlend, true, false, false>(args);
}
} else if (blendMode == BLEND_MULTIPLY) {
if (rgbmod) {
if (alphamod) {
- T::template blitInnerLoop<T::template MultiplyBlend, true, true, true, false, true>(args);
+ T::template blitInnerLoop<T::template MultiplyBlend, true, true, true>(args);
} else {
- T::template blitInnerLoop<T::template MultiplyBlend, true, true, false, false, true>(args);
+ T::template blitInnerLoop<T::template MultiplyBlend, true, true, false>(args);
}
} else {
if (alphamod) {
- T::template blitInnerLoop<T::template MultiplyBlend, true, false, true, false, true>(args);
+ T::template blitInnerLoop<T::template MultiplyBlend, true, false, true>(args);
} else {
- T::template blitInnerLoop<T::template MultiplyBlend, true, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template MultiplyBlend, true, false, false>(args);
}
}
} else {
assert(blendMode == BLEND_NORMAL);
if (rgbmod) {
if (alphamod) {
- T::template blitInnerLoop<T::template AlphaBlend, true, true, true, false, true>(args);
+ T::template blitInnerLoop<T::template AlphaBlend, true, true, true>(args);
} else {
- T::template blitInnerLoop<T::template AlphaBlend, true, true, false, false, true>(args);
+ T::template blitInnerLoop<T::template AlphaBlend, true, true, false>(args);
}
} else {
if (alphamod) {
- T::template blitInnerLoop<T::template AlphaBlend, true, false, true, false, true>(args);
+ T::template blitInnerLoop<T::template AlphaBlend, true, false, true>(args);
} else {
- T::template blitInnerLoop<T::template AlphaBlend, true, false, false, false, true>(args);
+ T::template blitInnerLoop<T::template AlphaBlend, true, false, false>(args);
}
}
}
diff --git a/graphics/blit/blit-avx2.cpp b/graphics/blit/blit-avx2.cpp
index 0f3134e1cdd..45f36121e90 100644
--- a/graphics/blit/blit-avx2.cpp
+++ b/graphics/blit/blit-avx2.cpp
@@ -38,12 +38,15 @@ namespace Graphics {
class BlendBlitImpl_AVX2 : public BlendBlitImpl_Base {
friend class BlendBlit;
-template<bool doscale, bool rgbmod, bool alphamod>
-struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alphamod> {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod> {
+public:
+ constexpr AlphaBlend(const uint32 color) : BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod>(color) {}
+
+ inline __m256i simd(__m256i src, __m256i dst) const {
__m256i ina;
if (alphamod)
- ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(ca)), 8);
+ ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(this->ca)), 8);
else
ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
__m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_setzero_si256());
@@ -59,9 +62,9 @@ struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alpha
dstR = _mm256_slli_epi32(_mm256_mullo_epi16(dstR, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), BlendBlit::kRModShift - 8);
dstG = _mm256_slli_epi32(_mm256_mullo_epi16(dstG, _mm256_sub_epi32(_mm256_set1_epi32(255), ina)), BlendBlit::kGModShift - 8);
dstB = _mm256_mullo_epi16(dstB, _mm256_sub_epi32(_mm256_set1_epi32(255), ina));
- srcR = _mm256_add_epi32(dstR, _mm256_slli_epi32(_mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcR, ina), 8), _mm256_set1_epi32(cr)), BlendBlit::kRModShift - 8));
- srcG = _mm256_add_epi32(dstG, _mm256_slli_epi32(_mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcG, ina), 8), _mm256_set1_epi32(cg)), BlendBlit::kGModShift - 8));
- srcB = _mm256_add_epi32(dstB, _mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcB, ina), 8), _mm256_set1_epi32(cb)));
+ srcR = _mm256_add_epi32(dstR, _mm256_slli_epi32(_mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcR, ina), 8), _mm256_set1_epi32(this->cr)), BlendBlit::kRModShift - 8));
+ srcG = _mm256_add_epi32(dstG, _mm256_slli_epi32(_mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcG, ina), 8), _mm256_set1_epi32(this->cg)), BlendBlit::kGModShift - 8));
+ srcB = _mm256_add_epi32(dstB, _mm256_mullo_epi16(_mm256_srli_epi32(_mm256_mullo_epi16(srcB, ina), 8), _mm256_set1_epi32(this->cb)));
src = _mm256_or_si256(_mm256_and_si256(srcB, _mm256_set1_epi32(BlendBlit::kBModMask)), _mm256_set1_epi32(BlendBlit::kAModMask));
src = _mm256_or_si256(_mm256_and_si256(srcG, _mm256_set1_epi32(BlendBlit::kGModMask)), src);
src = _mm256_or_si256(_mm256_and_si256(srcR, _mm256_set1_epi32(BlendBlit::kRModMask)), src);
@@ -85,12 +88,15 @@ struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alpha
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod, alphamod> {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod> {
+public:
+ constexpr MultiplyBlend(const uint32 color) : BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod>(color) {}
+
+ inline __m256i simd(__m256i src, __m256i dst) const {
__m256i ina, alphaMask;
if (alphamod) {
- ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(ca)), 8);
+ ina = _mm256_srli_epi32(_mm256_mullo_epi16(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(this->ca)), 8);
alphaMask = _mm256_cmpeq_epi32(ina, _mm256_setzero_si256());
} else {
ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
@@ -105,9 +111,9 @@ struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod,
__m256i dstG = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
__m256i dstR = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- srcB = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstB, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcB, _mm256_set1_epi32(cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm256_set1_epi32(BlendBlit::kBModMask));
- srcG = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstG, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcG, _mm256_set1_epi32(cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcR = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstR, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcR, _mm256_set1_epi32(cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm256_set1_epi32(BlendBlit::kRModMask));
+ srcB = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstB, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcB, _mm256_set1_epi32(this->cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm256_set1_epi32(BlendBlit::kBModMask));
+ srcG = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstG, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcG, _mm256_set1_epi32(this->cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcR = _mm256_and_si256(_mm256_slli_epi32(_mm256_mullo_epi32(dstR, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi16(srcR, _mm256_set1_epi32(this->cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm256_set1_epi32(BlendBlit::kRModMask));
src = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
src = _mm256_or_si256(src, _mm256_or_si256(srcB, _mm256_or_si256(srcG, srcR)));
@@ -131,16 +137,22 @@ struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod,
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<doscale, rgbmod, alphamod> {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod> {
+public:
+ constexpr OpaqueBlend(const uint32 color) : BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod>(color) {}
+
+ inline __m256i simd(__m256i src, __m256i dst) const {
return _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<doscale, rgbmod, alphamod> {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod> {
+public:
+ constexpr BinaryBlend(const uint32 color) : BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod>(color) {}
+
+ inline __m256i simd(__m256i src, __m256i dst) const {
__m256i alphaMask = _mm256_cmpeq_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_setzero_si256());
dst = _mm256_and_si256(dst, alphaMask);
src = _mm256_andnot_si256(alphaMask, _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)));
@@ -148,12 +160,15 @@ struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<doscale, rgbmod, alp
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod, alphamod> {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod> {
+public:
+ constexpr AdditiveBlend(const uint32 color) : BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod>(color) {}
+
+ inline __m256i simd(__m256i src, __m256i dst) const {
__m256i ina;
if (alphamod)
- ina = _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(ca)), 8);
+ ina = _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_set1_epi32(this->ca)), 8);
else
ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
__m256i alphaMask = _mm256_cmpeq_epi32(ina, _mm256_set1_epi32(0));
@@ -166,9 +181,9 @@ struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod,
__m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
__m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- srcb = _mm256_and_si256(_mm256_add_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(srcb, _mm256_mullo_epi32(_mm256_set1_epi32(cb), ina)), 16)), _mm256_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm256_and_si256(_mm256_add_epi32(dstg, _mm256_mullo_epi32(srcg, _mm256_mullo_epi32(_mm256_set1_epi32(cg), ina))), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm256_and_si256(_mm256_add_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(srcr, _mm256_mullo_epi32(_mm256_set1_epi32(cr), ina)), BlendBlit::kRModShift - 16)), _mm256_set1_epi32(BlendBlit::kRModMask));
+ srcb = _mm256_and_si256(_mm256_add_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(srcb, _mm256_mullo_epi32(_mm256_set1_epi32(this->cb), ina)), 16)), _mm256_set1_epi32(BlendBlit::kBModMask));
+ srcg = _mm256_and_si256(_mm256_add_epi32(dstg, _mm256_mullo_epi32(srcg, _mm256_mullo_epi32(_mm256_set1_epi32(this->cg), ina))), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcr = _mm256_and_si256(_mm256_add_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(srcr, _mm256_mullo_epi32(_mm256_set1_epi32(this->cr), ina)), BlendBlit::kRModShift - 16)), _mm256_set1_epi32(BlendBlit::kRModMask));
src = _mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kAModMask));
src = _mm256_or_si256(src, _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcb)));
@@ -202,9 +217,12 @@ struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod,
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<doscale, rgbmod, alphamod> {
- static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod> {
+public:
+ constexpr SubtractiveBlend(const uint32 color) : BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod>(color) {}
+
+ inline __m256i simd(__m256i src, __m256i dst) const {
__m256i ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
__m256i srcb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
__m256i srcg = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
@@ -213,27 +231,23 @@ struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<doscale, r
__m256i dstg = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
__m256i dstr = _mm256_srli_epi32(_mm256_and_si256(dst, _mm256_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- srcb = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcb, _mm256_set1_epi32(cb)), _mm256_mullo_epi32(dstb, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kBModShift), _mm256_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcg, _mm256_set1_epi32(cg)), _mm256_mullo_epi32(dstg, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kGModShift), _mm256_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcr, _mm256_set1_epi32(cr)), _mm256_mullo_epi32(dstr, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kRModShift), _mm256_set1_epi32(BlendBlit::kRModMask));
+ srcb = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstb, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcb, _mm256_set1_epi32(this->cb)), _mm256_mullo_epi32(dstb, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kBModShift), _mm256_set1_epi32(BlendBlit::kBModMask));
+ srcg = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstg, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcg, _mm256_set1_epi32(this->cg)), _mm256_mullo_epi32(dstg, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kGModShift), _mm256_set1_epi32(BlendBlit::kGModMask));
+ srcr = _mm256_and_si256(_mm256_slli_epi32(_mm256_max_epi16(_mm256_sub_epi32(dstr, _mm256_srli_epi32(_mm256_mullo_epi32(_mm256_mullo_epi32(srcr, _mm256_set1_epi32(this->cr)), _mm256_mullo_epi32(dstr, ina)), 24)), _mm256_set1_epi32(0)), BlendBlit::kRModShift), _mm256_set1_epi32(BlendBlit::kRModMask));
return _mm256_or_si256(_mm256_set1_epi32(BlendBlit::kAModMask), _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcr)));
}
};
public:
-template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
+template<template <bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod>
static void blitInnerLoop(BlendBlit::Args &args) {
+ const bool loaddst = true; // TODO: Only set this when necessary
+
const byte *in;
byte *out;
- const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
- const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
- const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
- const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
- const uint32 cr = coloradd1 ? (rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod ? rawcr : 255);
- const uint32 cg = coloradd1 ? (rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod ? rawcg : 255);
- const uint32 cb = coloradd1 ? (rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod ? rawcb : 255);
+ const PixelFunc<rgbmod, alphamod> pixelFunc(args.color);
int scaleXCtr, scaleYCtr = args.scaleYoff;
const byte *inBase;
@@ -273,7 +287,7 @@ static void blitInnerLoop(BlendBlit::Args &args) {
srcPixels = _mm256_permute2x128_si256(srcPixels, srcPixels, 0x01);
}
{
- const __m256i res = PixelFunc<doscale, rgbmod, alphamod>::simd(srcPixels, dstPixels, args.flipping & FLIP_H, ca, cr, cg, cb);
+ const __m256i res = pixelFunc.simd(srcPixels, dstPixels);
_mm256_storeu_si256((__m256i *)out, res);
}
if (!doscale) in += (ptrdiff_t)args.inStep * 8;
@@ -285,8 +299,8 @@ static void blitInnerLoop(BlendBlit::Args &args) {
in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
}
- PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
-
+ pixelFunc.normal(in, out);
+
if (doscale)
scaleXCtr += args.scaleX;
else
diff --git a/graphics/blit/blit-generic.cpp b/graphics/blit/blit-generic.cpp
index 25033ffcea1..b153851b4fb 100644
--- a/graphics/blit/blit-generic.cpp
+++ b/graphics/blit/blit-generic.cpp
@@ -28,18 +28,12 @@ class BlendBlitImpl_Default : public BlendBlitImpl_Base {
friend class BlendBlit;
public:
-template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
+template<template <bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod>
static inline void blitInnerLoop(BlendBlit::Args &args) {
const byte *in;
byte *out;
- const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
- const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
- const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
- const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
- const uint32 cr = coloradd1 ? (rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod ? rawcr : 255);
- const uint32 cg = coloradd1 ? (rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod ? rawcg : 255);
- const uint32 cb = coloradd1 ? (rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod ? rawcb : 255);
+ const PixelFunc<rgbmod, alphamod> pixelFunc(args.color);
int scaleXCtr, scaleYCtr = args.scaleYoff;
const byte *inBase;
@@ -58,7 +52,7 @@ static inline void blitInnerLoop(BlendBlit::Args &args) {
in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
}
- PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
+ pixelFunc.normal(in, out);
if (doscale)
scaleXCtr += args.scaleX;
@@ -158,22 +152,22 @@ static void doBlitBinaryBlendLogicGeneric(BlendBlit::Args &args) {
}; // end of class BlendBlitImpl_Default
template<>
-inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::OpaqueBlend, true, false, false, false, true>(BlendBlit::Args &args) {
+inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::OpaqueBlend, true, false, false>(BlendBlit::Args &args) {
doBlitOpaqueBlendLogicGeneric<true>(args);
}
template<>
-inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::OpaqueBlend, false, false, false, false, true>(BlendBlit::Args &args) {
+inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::OpaqueBlend, false, false, false>(BlendBlit::Args &args) {
doBlitOpaqueBlendLogicGeneric<false>(args);
}
template<>
-inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::BinaryBlend, true, false, false, false, true>(BlendBlit::Args &args) {
+inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::BinaryBlend, true, false, false>(BlendBlit::Args &args) {
doBlitBinaryBlendLogicGeneric<true>(args);
}
template<>
-inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::BinaryBlend, false, false, false, false, true>(BlendBlit::Args &args) {
+inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::BinaryBlend, false, false, false>(BlendBlit::Args &args) {
doBlitBinaryBlendLogicGeneric<false>(args);
}
diff --git a/graphics/blit/blit-neon.cpp b/graphics/blit/blit-neon.cpp
index d35668e0aa0..b35a22827c3 100644
--- a/graphics/blit/blit-neon.cpp
+++ b/graphics/blit/blit-neon.cpp
@@ -44,12 +44,15 @@ namespace Graphics {
class BlendBlitImpl_NEON : public BlendBlitImpl_Base {
friend class BlendBlit;
-template<bool doscale, bool rgbmod, bool alphamod>
-struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alphamod> {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod> {
+public:
+ constexpr AlphaBlend(const uint32 color) : BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod>(color) {}
+
+ inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
uint32x4_t ina;
if (alphamod)
- ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(ca)), 8);
+ ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(this->ca)), 8);
else
ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
@@ -65,9 +68,9 @@ struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alpha
dstR = vshrq_n_u32(vmulq_u32(dstR, vsubq_u32(vmovq_n_u32(255), ina)), 8);
dstG = vshrq_n_u32(vmulq_u32(dstG, vsubq_u32(vmovq_n_u32(255), ina)), 8);
dstB = vshrq_n_u32(vmulq_u32(dstB, vsubq_u32(vmovq_n_u32(255), ina)), 8);
- srcR = vaddq_u32(dstR, vshrq_n_u32(vmulq_u32(vmulq_u32(srcR, ina), vmovq_n_u32(cr)), 16));
- srcG = vaddq_u32(dstG, vshrq_n_u32(vmulq_u32(vmulq_u32(srcG, ina), vmovq_n_u32(cg)), 16));
- srcB = vaddq_u32(dstB, vshrq_n_u32(vmulq_u32(vmulq_u32(srcB, ina), vmovq_n_u32(cb)), 16));
+ srcR = vaddq_u32(dstR, vshrq_n_u32(vmulq_u32(vmulq_u32(srcR, ina), vmovq_n_u32(this->cr)), 16));
+ srcG = vaddq_u32(dstG, vshrq_n_u32(vmulq_u32(vmulq_u32(srcG, ina), vmovq_n_u32(this->cg)), 16));
+ srcB = vaddq_u32(dstB, vshrq_n_u32(vmulq_u32(vmulq_u32(srcB, ina), vmovq_n_u32(this->cb)), 16));
src = vorrq_u32(vandq_u32(srcB, vmovq_n_u32(BlendBlit::kBModMask)), vmovq_n_u32(BlendBlit::kAModMask));
src = vorrq_u32(vandq_u32(vshlq_n_u32(srcG, 8), vmovq_n_u32(BlendBlit::kGModMask)), src);
src = vorrq_u32(vandq_u32(vshlq_n_u32(srcR, 16), vmovq_n_u32(BlendBlit::kRModMask)), src);
@@ -91,12 +94,15 @@ struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alpha
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod, alphamod> {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod> {
+public:
+ constexpr MultiplyBlend(const uint32 color) : BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod>(color) {}
+
+ inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
uint32x4_t ina, alphaMask;
if (alphamod) {
- ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(ca)), 8);
+ ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(this->ca)), 8);
alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
} else {
ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
@@ -111,9 +117,9 @@ struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod,
uint32x4_t dstG = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
uint32x4_t dstR = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- srcB = vandq_u32(vshlq_n_u32(vmulq_u32(dstB, vshrq_n_u32(vmulq_u32(vmulq_u32(srcB, vmovq_n_u32(cb)), ina), 16)), BlendBlit::kBModShift - 8), vmovq_n_u32(BlendBlit::kBModMask));
- srcG = vandq_u32(vshlq_n_u32(vmulq_u32(dstG, vshrq_n_u32(vmulq_u32(vmulq_u32(srcG, vmovq_n_u32(cg)), ina), 16)), BlendBlit::kGModShift - 8), vmovq_n_u32(BlendBlit::kGModMask));
- srcR = vandq_u32(vshlq_n_u32(vmulq_u32(dstR, vshrq_n_u32(vmulq_u32(vmulq_u32(srcR, vmovq_n_u32(cr)), ina), 16)), BlendBlit::kRModShift - 8), vmovq_n_u32(BlendBlit::kRModMask));
+ srcB = vandq_u32(vshlq_n_u32(vmulq_u32(dstB, vshrq_n_u32(vmulq_u32(vmulq_u32(srcB, vmovq_n_u32(this->cb)), ina), 16)), BlendBlit::kBModShift - 8), vmovq_n_u32(BlendBlit::kBModMask));
+ srcG = vandq_u32(vshlq_n_u32(vmulq_u32(dstG, vshrq_n_u32(vmulq_u32(vmulq_u32(srcG, vmovq_n_u32(this->cg)), ina), 16)), BlendBlit::kGModShift - 8), vmovq_n_u32(BlendBlit::kGModMask));
+ srcR = vandq_u32(vshlq_n_u32(vmulq_u32(dstR, vshrq_n_u32(vmulq_u32(vmulq_u32(srcR, vmovq_n_u32(this->cr)), ina), 16)), BlendBlit::kRModShift - 8), vmovq_n_u32(BlendBlit::kRModMask));
src = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
src = vorrq_u32(src, vorrq_u32(srcB, vorrq_u32(srcG, srcR)));
@@ -137,16 +143,22 @@ struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod,
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<doscale, rgbmod, alphamod> {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod> {
+public:
+ constexpr OpaqueBlend(const uint32 color) : BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod>(color) {}
+
+ inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
return vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<doscale, rgbmod, alphamod> {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod> {
+public:
+ constexpr BinaryBlend(const uint32 color) : BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod>(color) {}
+
+ inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
uint32x4_t alphaMask = vceqq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmovq_n_u32(0));
dst = vandq_u32(dst, alphaMask);
src = vandq_u32(vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmvnq_u32(alphaMask));
@@ -154,12 +166,15 @@ struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<doscale, rgbmod, alp
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod, alphamod> {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod> {
+public:
+ constexpr AdditiveBlend(const uint32 color) : BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod>(color) {}
+
+ inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
uint32x4_t ina;
if (alphamod)
- ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(ca)), 8);
+ ina = vshrq_n_u32(vmulq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vdupq_n_u32(this->ca)), 8);
else
ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
uint32x4_t alphaMask = vceqq_u32(ina, vmovq_n_u32(0));
@@ -172,9 +187,9 @@ struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod,
uint32x4_t dstg = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
uint32x4_t dstr = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- srcb = vandq_u32(vaddq_u32(dstb, vshrq_n_u32(vmulq_u32(srcb, vmulq_u32(vmovq_n_u32(cb), ina)), 16)), vmovq_n_u32(BlendBlit::kBModMask));
- srcg = vandq_u32(vaddq_u32(dstg, vmulq_u32(srcg, vmulq_u32(vmovq_n_u32(cg), ina))), vmovq_n_u32(BlendBlit::kGModMask));
- srcr = vandq_u32(vaddq_u32(dstr, vshrq_n_u32(vmulq_u32(srcr, vmulq_u32(vmovq_n_u32(cr), ina)), BlendBlit::kRModShift - 16)), vmovq_n_u32(BlendBlit::kRModMask));
+ srcb = vandq_u32(vaddq_u32(dstb, vshrq_n_u32(vmulq_u32(srcb, vmulq_u32(vmovq_n_u32(this->cb), ina)), 16)), vmovq_n_u32(BlendBlit::kBModMask));
+ srcg = vandq_u32(vaddq_u32(dstg, vmulq_u32(srcg, vmulq_u32(vmovq_n_u32(this->cg), ina))), vmovq_n_u32(BlendBlit::kGModMask));
+ srcr = vandq_u32(vaddq_u32(dstr, vshrq_n_u32(vmulq_u32(srcr, vmulq_u32(vmovq_n_u32(this->cr), ina)), BlendBlit::kRModShift - 16)), vmovq_n_u32(BlendBlit::kRModMask));
src = vandq_u32(dst, vmovq_n_u32(BlendBlit::kAModMask));
src = vorrq_u32(src, vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
@@ -208,9 +223,12 @@ struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod,
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<doscale, rgbmod, alphamod> {
- static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod> {
+public:
+ constexpr SubtractiveBlend(const uint32 color) : BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod>(color) {}
+
+ inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst) const {
uint32x4_t ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
uint32x4_t srcb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
uint32x4_t srcg = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
@@ -219,27 +237,23 @@ struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<doscale, r
uint32x4_t dstg = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
uint32x4_t dstr = vshrq_n_u32(vandq_u32(dst, vmovq_n_u32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- srcb = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstb), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcb, vmovq_n_u32(cb)), vmulq_u32(dstb, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kBModShift), vmovq_n_u32(BlendBlit::kBModMask));
- srcg = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstg), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcg, vmovq_n_u32(cg)), vmulq_u32(dstg, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kGModShift), vmovq_n_u32(BlendBlit::kGModMask));
- srcr = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstr), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcr, vmovq_n_u32(cr)), vmulq_u32(dstr, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kRModShift), vmovq_n_u32(BlendBlit::kRModMask));
+ srcb = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstb), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcb, vmovq_n_u32(this->cb)), vmulq_u32(dstb, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kBModShift), vmovq_n_u32(BlendBlit::kBModMask));
+ srcg = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstg), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcg, vmovq_n_u32(this->cg)), vmulq_u32(dstg, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kGModShift), vmovq_n_u32(BlendBlit::kGModMask));
+ srcr = vandq_u32(vshlq_n_u32(vreinterpretq_u32_s32(vmaxq_s32(vsubq_s32(vreinterpretq_s32_u32(dstr), vreinterpretq_s32_u32(vshrq_n_u32(vmulq_u32(vmulq_u32(srcr, vmovq_n_u32(this->cr)), vmulq_u32(dstr, ina)), 24))), vmovq_n_s32(0))), BlendBlit::kRModShift), vmovq_n_u32(BlendBlit::kRModMask));
return vorrq_u32(vmovq_n_u32(BlendBlit::kAModMask), vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
}
};
public:
-template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
+template<template <bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod>
static inline void blitInnerLoop(BlendBlit::Args &args) {
+ const bool loaddst = true; // TODO: Only set this when necessary
+
const byte *in;
byte *out;
- const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
- const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
- const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
- const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
- const uint32 cr = coloradd1 ? (rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod ? rawcr : 255);
- const uint32 cg = coloradd1 ? (rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod ? rawcg : 255);
- const uint32 cb = coloradd1 ? (rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod ? rawcb : 255);
+ PixelFunc<rgbmod, alphamod> pixelFunc(args.color);
int scaleXCtr, scaleYCtr = args.scaleYoff;
const byte *inBase;
@@ -276,7 +290,7 @@ static inline void blitInnerLoop(BlendBlit::Args &args) {
srcPixels = vcombine_u32(vget_high_u32(srcPixels), vget_low_u32(srcPixels));
}
{
- const uint32x4_t res = PixelFunc<doscale, rgbmod, alphamod>::simd(srcPixels, dstPixels, args.flipping & FLIP_H, ca, cr, cg, cb);
+ const uint32x4_t res = pixelFunc.simd(srcPixels, dstPixels);
vst1q_u32((uint32 *)out, res);
}
if (!doscale) in += args.inStep * 4;
@@ -288,7 +302,7 @@ static inline void blitInnerLoop(BlendBlit::Args &args) {
in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
}
- PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
+ pixelFunc.normal(in, out);
if (doscale)
scaleXCtr += args.scaleX;
diff --git a/graphics/blit/blit-sse2.cpp b/graphics/blit/blit-sse2.cpp
index c454d88f3b8..e6634ade320 100644
--- a/graphics/blit/blit-sse2.cpp
+++ b/graphics/blit/blit-sse2.cpp
@@ -48,12 +48,15 @@ static FORCEINLINE __m128i sse2_mul32(__m128i a, __m128i b) {
class BlendBlitImpl_SSE2 : public BlendBlitImpl_Base {
friend class BlendBlit;
-template<bool doscale, bool rgbmod, bool alphamod>
-struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alphamod> {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod> {
+public:
+ constexpr AlphaBlend(const uint32 color) : BlendBlitImpl_Base::AlphaBlend<rgbmod, alphamod>(color) {}
+
+ inline __m128i simd(__m128i src, __m128i dst) const {
__m128i ina;
if (alphamod)
- ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(ca)), 8);
+ ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(this->ca)), 8);
else
ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
__m128i alphaMask = _mm_cmpeq_epi32(ina, _mm_setzero_si128());
@@ -69,9 +72,9 @@ struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alpha
dstR = _mm_slli_epi32(_mm_mullo_epi16(dstR, _mm_sub_epi32(_mm_set1_epi32(255), ina)), BlendBlit::kRModShift - 8);
dstG = _mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_sub_epi32(_mm_set1_epi32(255), ina)), BlendBlit::kGModShift - 8);
dstB = _mm_mullo_epi16(dstB, _mm_sub_epi32(_mm_set1_epi32(255), ina));
- srcR = _mm_add_epi32(dstR, _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcR, ina), 8), _mm_set1_epi32(cr)), BlendBlit::kRModShift - 8));
- srcG = _mm_add_epi32(dstG, _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8), _mm_set1_epi32(cg)), BlendBlit::kGModShift - 8));
- srcB = _mm_add_epi32(dstB, _mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcB, ina), 8), _mm_set1_epi32(cb)));
+ srcR = _mm_add_epi32(dstR, _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcR, ina), 8), _mm_set1_epi32(this->cr)), BlendBlit::kRModShift - 8));
+ srcG = _mm_add_epi32(dstG, _mm_slli_epi32(_mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcG, ina), 8), _mm_set1_epi32(this->cg)), BlendBlit::kGModShift - 8));
+ srcB = _mm_add_epi32(dstB, _mm_mullo_epi16(_mm_srli_epi32(_mm_mullo_epi16(srcB, ina), 8), _mm_set1_epi32(this->cb)));
src = _mm_or_si128(_mm_and_si128(srcB, _mm_set1_epi32(BlendBlit::kBModMask)), _mm_set1_epi32(BlendBlit::kAModMask));
src = _mm_or_si128(_mm_and_si128(srcG, _mm_set1_epi32(BlendBlit::kGModMask)), src);
src = _mm_or_si128(_mm_and_si128(srcR, _mm_set1_epi32(BlendBlit::kRModMask)), src);
@@ -95,12 +98,15 @@ struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alpha
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod, alphamod> {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod> {
+public:
+ constexpr MultiplyBlend(const uint32 color) : BlendBlitImpl_Base::MultiplyBlend<rgbmod, alphamod>(color) {}
+
+ inline __m128i simd(__m128i src, __m128i dst) const {
__m128i ina, alphaMask;
if (alphamod) {
- ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(ca)), 8);
+ ina = _mm_srli_epi32(_mm_mullo_epi16(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(this->ca)), 8);
alphaMask = _mm_cmpeq_epi32(ina, _mm_setzero_si128());
} else {
ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
@@ -115,9 +121,9 @@ struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod,
__m128i dstG = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
__m128i dstR = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- srcB = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstB, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcB, _mm_set1_epi32(cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm_set1_epi32(BlendBlit::kBModMask));
- srcG = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcG, _mm_set1_epi32(cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm_set1_epi32(BlendBlit::kGModMask));
- srcR = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstR, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcR, _mm_set1_epi32(cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm_set1_epi32(BlendBlit::kRModMask));
+ srcB = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstB, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcB, _mm_set1_epi32(this->cb)), ina), 16)), BlendBlit::kBModShift - 8), _mm_set1_epi32(BlendBlit::kBModMask));
+ srcG = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstG, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcG, _mm_set1_epi32(this->cg)), ina), 16)), BlendBlit::kGModShift - 8), _mm_set1_epi32(BlendBlit::kGModMask));
+ srcR = _mm_and_si128(_mm_slli_epi32(_mm_mullo_epi16(dstR, _mm_srli_epi32(sse2_mul32(_mm_mullo_epi16(srcR, _mm_set1_epi32(this->cr)), ina), 16)), BlendBlit::kRModShift - 8), _mm_set1_epi32(BlendBlit::kRModMask));
src = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
src = _mm_or_si128(src, _mm_or_si128(srcB, _mm_or_si128(srcG, srcR)));
@@ -141,16 +147,22 @@ struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod,
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<doscale, rgbmod, alphamod> {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod> {
+public:
+ constexpr OpaqueBlend(const uint32 color) : BlendBlitImpl_Base::OpaqueBlend<rgbmod, alphamod>(color) {}
+
+ inline __m128i simd(__m128i src, __m128i dst) const {
return _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<doscale, rgbmod, alphamod> {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod> {
+public:
+ constexpr BinaryBlend(const uint32 color) : BlendBlitImpl_Base::BinaryBlend<rgbmod, alphamod>(color) {}
+
+ inline __m128i simd(__m128i src, __m128i dst) const {
__m128i alphaMask = _mm_cmpeq_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_setzero_si128());
dst = _mm_and_si128(dst, alphaMask);
src = _mm_andnot_si128(alphaMask, _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)));
@@ -158,12 +170,15 @@ struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<doscale, rgbmod, alp
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod, alphamod> {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod> {
+public:
+ constexpr AdditiveBlend(const uint32 color) : BlendBlitImpl_Base::AdditiveBlend<rgbmod, alphamod>(color) {}
+
+ inline __m128i simd(__m128i src, __m128i dst) const {
__m128i ina;
if (alphamod)
- ina = _mm_srli_epi32(sse2_mul32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(ca)), 8);
+ ina = _mm_srli_epi32(sse2_mul32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_set1_epi32(this->ca)), 8);
else
ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
__m128i alphaMask = _mm_cmpeq_epi32(ina, _mm_set1_epi32(0));
@@ -176,9 +191,9 @@ struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod,
__m128i dstg = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
__m128i dstr = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- srcb = _mm_and_si128(_mm_add_epi32(dstb, _mm_srli_epi32(sse2_mul32(srcb, sse2_mul32(_mm_set1_epi32(cb), ina)), 16)), _mm_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm_and_si128(_mm_add_epi32(dstg, sse2_mul32(srcg, sse2_mul32(_mm_set1_epi32(cg), ina))), _mm_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm_and_si128(_mm_add_epi32(dstr, _mm_srli_epi32(sse2_mul32(srcr, sse2_mul32(_mm_set1_epi32(cr), ina)), BlendBlit::kRModShift - 16)), _mm_set1_epi32(BlendBlit::kRModMask));
+ srcb = _mm_and_si128(_mm_add_epi32(dstb, _mm_srli_epi32(sse2_mul32(srcb, sse2_mul32(_mm_set1_epi32(this->cb), ina)), 16)), _mm_set1_epi32(BlendBlit::kBModMask));
+ srcg = _mm_and_si128(_mm_add_epi32(dstg, sse2_mul32(srcg, sse2_mul32(_mm_set1_epi32(this->cg), ina))), _mm_set1_epi32(BlendBlit::kGModMask));
+ srcr = _mm_and_si128(_mm_add_epi32(dstr, _mm_srli_epi32(sse2_mul32(srcr, sse2_mul32(_mm_set1_epi32(this->cr), ina)), BlendBlit::kRModShift - 16)), _mm_set1_epi32(BlendBlit::kRModMask));
src = _mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kAModMask));
src = _mm_or_si128(src, _mm_or_si128(srcb, _mm_or_si128(srcg, srcr)));
@@ -212,9 +227,12 @@ struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod,
}
};
-template<bool doscale, bool rgbmod, bool alphamod>
-struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<doscale, rgbmod, alphamod> {
- static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
+template<bool rgbmod, bool alphamod>
+struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod> {
+public:
+ constexpr SubtractiveBlend(const uint32 color) : BlendBlitImpl_Base::SubtractiveBlend<rgbmod, alphamod>(color) {}
+
+ inline __m128i simd(__m128i src, __m128i dst) const {
__m128i ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
__m128i srcb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
__m128i srcg = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
@@ -223,27 +241,23 @@ struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<doscale, r
__m128i dstg = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kGModMask)), BlendBlit::kGModShift);
__m128i dstr = _mm_srli_epi32(_mm_and_si128(dst, _mm_set1_epi32(BlendBlit::kRModMask)), BlendBlit::kRModShift);
- srcb = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstb, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcb, _mm_set1_epi32(cb)), sse2_mul32(dstb, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kBModShift), _mm_set1_epi32(BlendBlit::kBModMask));
- srcg = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstg, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcg, _mm_set1_epi32(cg)), sse2_mul32(dstg, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kGModShift), _mm_set1_epi32(BlendBlit::kGModMask));
- srcr = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstr, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcr, _mm_set1_epi32(cr)), sse2_mul32(dstr, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kRModShift), _mm_set1_epi32(BlendBlit::kRModMask));
+ srcb = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstb, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcb, _mm_set1_epi32(this->cb)), sse2_mul32(dstb, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kBModShift), _mm_set1_epi32(BlendBlit::kBModMask));
+ srcg = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstg, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcg, _mm_set1_epi32(this->cg)), sse2_mul32(dstg, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kGModShift), _mm_set1_epi32(BlendBlit::kGModMask));
+ srcr = _mm_and_si128(_mm_slli_epi32(_mm_max_epi16(_mm_sub_epi32(dstr, _mm_srli_epi32(sse2_mul32(sse2_mul32(srcr, _mm_set1_epi32(this->cr)), sse2_mul32(dstr, ina)), 24)), _mm_set1_epi32(0)), BlendBlit::kRModShift), _mm_set1_epi32(BlendBlit::kRModMask));
return _mm_or_si128(_mm_set1_epi32(BlendBlit::kAModMask), _mm_or_si128(srcb, _mm_or_si128(srcg, srcr)));
}
};
public:
-template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
+template<template <bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod>
static inline void blitInnerLoop(BlendBlit::Args &args) {
+ const bool loaddst = true; // TODO: Only set this when necessary
+
const byte *in;
byte *out;
- const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
- const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
- const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
- const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
- const uint32 cr = coloradd1 ? (rgbmod ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod ? rawcr : 255);
- const uint32 cg = coloradd1 ? (rgbmod ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod ? rawcg : 255);
- const uint32 cb = coloradd1 ? (rgbmod ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod ? rawcb : 255);
+ PixelFunc<rgbmod, alphamod> pixelFunc(args.color);
int scaleXCtr, scaleYCtr = args.scaleYoff;
const byte *inBase;
@@ -278,7 +292,7 @@ static inline void blitInnerLoop(BlendBlit::Args &args) {
srcPixels = _mm_shuffle_epi32(srcPixels, _MM_SHUFFLE(0, 1, 2, 3));
}
{
- const __m128i res = PixelFunc<doscale, rgbmod, alphamod>::simd(srcPixels, dstPixels, args.flipping & FLIP_H, ca, cr, cg, cb);
+ const __m128i res = pixelFunc.simd(srcPixels, dstPixels);
_mm_storeu_si128((__m128i *)out, res);
}
if (!doscale) in += (ptrdiff_t)args.inStep * 4;
@@ -290,8 +304,8 @@ static inline void blitInnerLoop(BlendBlit::Args &args) {
in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
}
- PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
-
+ pixelFunc.normal(in, out);
+
if (doscale)
scaleXCtr += args.scaleX;
else
Commit: c41edfd2e5f771c4315cd1c3cb9bf761e75d1dab
https://github.com/scummvm/scummvm/commit/c41edfd2e5f771c4315cd1c3cb9bf761e75d1dab
Author: Cameron Cawley (ccawley2011 at gmail.com)
Date: 2024-09-29T02:19:42+03:00
Commit Message:
GRAPHICS: Some optimisations for the alpha blending routines
Changed paths:
graphics/blit/blit-alpha.h
diff --git a/graphics/blit/blit-alpha.h b/graphics/blit/blit-alpha.h
index 0fbe93bd1da..2ebe959c82d 100644
--- a/graphics/blit/blit-alpha.h
+++ b/graphics/blit/blit-alpha.h
@@ -46,9 +46,27 @@ public:
constexpr AlphaBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
inline void normal(const byte *in, byte *out) const {
- uint32 ina = in[BlendBlit::kAIndex] * this->ca >> 8;
+ uint32 ina;
- if (ina != 0) {
+ if (alphamod) {
+ ina = in[BlendBlit::kAIndex] * this->ca >> 8;
+ } else {
+ ina = in[BlendBlit::kAIndex];
+ }
+
+ if (ina == 255) {
+ if (rgbmod) {
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = (in[BlendBlit::kBIndex] * this->cb >> 8);
+ out[BlendBlit::kGIndex] = (in[BlendBlit::kGIndex] * this->cg >> 8);
+ out[BlendBlit::kRIndex] = (in[BlendBlit::kRIndex] * this->cr >> 8);
+ } else {
+ out[BlendBlit::kAIndex] = 255;
+ out[BlendBlit::kBIndex] = in[BlendBlit::kBIndex];
+ out[BlendBlit::kGIndex] = in[BlendBlit::kGIndex];
+ out[BlendBlit::kRIndex] = in[BlendBlit::kRIndex];
+ }
+ } else if (ina != 0) {
if (rgbmod) {
const uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
const uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
@@ -75,12 +93,34 @@ public:
constexpr MultiplyBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
inline void normal(const byte *in, byte *out) const {
- uint32 ina = in[BlendBlit::kAIndex] * this->ca >> 8;
+ uint32 ina;
- if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * this->cb * ina) >> 16) >> 8;
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * this->cg * ina) >> 16) >> 8;
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * this->cr * ina) >> 16) >> 8;
+ if (alphamod) {
+ ina = in[BlendBlit::kAIndex] * this->ca >> 8;
+ } else {
+ ina = in[BlendBlit::kAIndex];
+ }
+
+ if (ina == 255) {
+ if (rgbmod) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * this->cb) >> 8) >> 8;
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * this->cg) >> 8) >> 8;
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * this->cr) >> 8) >> 8;
+ } else {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * in[BlendBlit::kBIndex] >> 8;
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * in[BlendBlit::kGIndex] >> 8;
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * in[BlendBlit::kRIndex] >> 8;
+ }
+ } else if (ina != 0) {
+ if (rgbmod) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * this->cb * ina) >> 16) >> 8;
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * this->cg * ina) >> 16) >> 8;
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * this->cr * ina) >> 16) >> 8;
+ } else {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * ina) >> 8) >> 8;
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * ina) >> 8) >> 8;
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * ina) >> 8) >> 8;
+ }
}
}
};
@@ -102,11 +142,10 @@ public:
inline void normal(const byte *in, byte *out) const {
uint32 pix = *(const uint32 *)in;
- int a = in[BlendBlit::kAIndex];
+ uint32 a = pix & BlendBlit::kAModMask;
if (a != 0) { // Full opacity (Any value not exactly 0 is Opaque here)
- *(uint32 *)out = pix;
- out[BlendBlit::kAIndex] = 0xFF;
+ *(uint32 *)out = pix | BlendBlit::kAModMask;
}
}
};
@@ -117,12 +156,34 @@ public:
constexpr AdditiveBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
inline void normal(const byte *in, byte *out) const {
- uint32 ina = in[BlendBlit::kAIndex] * this->ca >> 8;
+ uint32 ina;
- if (ina != 0) {
- out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * this->cb * ina) >> 16);
- out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * this->cg * ina) >> 16);
- out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * this->cr * ina) >> 16);
+ if (alphamod) {
+ ina = in[BlendBlit::kAIndex] * this->ca >> 8;
+ } else {
+ ina = in[BlendBlit::kAIndex];
+ }
+
+ if (ina == 255) {
+ if (rgbmod) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * this->cb) >> 8);
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * this->cg) >> 8);
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * this->cr) >> 8);
+ } else {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + in[BlendBlit::kBIndex];
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + in[BlendBlit::kGIndex];
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + in[BlendBlit::kRIndex];
+ }
+ } else if (ina != 0) {
+ if (rgbmod) {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * this->cb * ina) >> 16);
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * this->cg * ina) >> 16);
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * this->cr * ina) >> 16);
+ } else {
+ out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * ina) >> 8);
+ out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * ina) >> 8);
+ out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * ina) >> 8);
+ }
}
}
};
@@ -133,10 +194,30 @@ public:
constexpr SubtractiveBlend(const uint32 color) : BaseBlend<rgbmod, alphamod>(color) {}
inline void normal(const byte *in, byte *out) const {
+ uint32 ina = in[BlendBlit::kAIndex];
out[BlendBlit::kAIndex] = 255;
- out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * this->cb * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * this->cg * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
- out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * this->cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+
+ if (ina == 255) {
+ if (rgbmod) {
+ out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * this->cb * (out[BlendBlit::kBIndex])) >> 16), 0);
+ out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * this->cg * (out[BlendBlit::kGIndex])) >> 16), 0);
+ out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * this->cr * (out[BlendBlit::kRIndex])) >> 16), 0);
+ } else {
+ out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * (out[BlendBlit::kBIndex])) >> 8), 0);
+ out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * (out[BlendBlit::kGIndex])) >> 8), 0);
+ out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * (out[BlendBlit::kRIndex])) >> 8), 0);
+ }
+ } else if (ina != 0) {
+ if (rgbmod) {
+ out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * this->cb * (out[BlendBlit::kBIndex]) * ina) >> 24), 0);
+ out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * this->cg * (out[BlendBlit::kGIndex]) * ina) >> 24), 0);
+ out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * this->cr * (out[BlendBlit::kRIndex]) * ina) >> 24), 0);
+ } else {
+ out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * (out[BlendBlit::kBIndex]) * ina) >> 16), 0);
+ out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * (out[BlendBlit::kGIndex]) * ina) >> 16), 0);
+ out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * (out[BlendBlit::kRIndex]) * ina) >> 16), 0);
+ }
+ }
}
};
Commit: ec53c5ea87c7350eded25e6333c77bbbd3519a8d
https://github.com/scummvm/scummvm/commit/ec53c5ea87c7350eded25e6333c77bbbd3519a8d
Author: Cameron Cawley (ccawley2011 at gmail.com)
Date: 2024-09-29T02:19:42+03:00
Commit Message:
GRAPHICS: Simplify the generic blending routines
Changed paths:
graphics/blit/blit-generic.cpp
diff --git a/graphics/blit/blit-generic.cpp b/graphics/blit/blit-generic.cpp
index b153851b4fb..bbdbc062f28 100644
--- a/graphics/blit/blit-generic.cpp
+++ b/graphics/blit/blit-generic.cpp
@@ -68,109 +68,8 @@ static inline void blitInnerLoop(BlendBlit::Args &args) {
}
}
-template<bool doscale>
-static void doBlitOpaqueBlendLogicGeneric(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + (scaleYCtr + 1) / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
-
- if (doscale) {
- for (uint32 j = 0; j < args.width; j++) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
- scaleXCtr += args.scaleX;
- out += 4;
- }
- } else {
- for (uint32 j = 0; j < args.width; j++) {
- *(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
- in += args.inStep;
- out += 4;
- }
- }
-
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
-}
-
-template<bool doscale>
-static void doBlitBinaryBlendLogicGeneric(BlendBlit::Args &args) {
- const byte *in;
- byte *out;
-
- int scaleXCtr, scaleYCtr = args.scaleYoff;
- const byte *inBase;
-
- for (uint32 i = 0; i < args.height; i++) {
- if (doscale) {
- inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
- scaleXCtr = args.scaleXoff;
- } else {
- in = args.ino;
- }
- out = args.outo;
- for (uint32 j = 0; j < args.width; j++) {
- if (doscale) {
- in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
- }
-
- uint32 pix = *(const uint32 *)in, pixout = *(const uint32 *)out;
- uint32 mask = (pix & BlendBlit::kAModMask) ? 0xffffffff : 0;
- pixout &= ~mask;
- pix = (pix | BlendBlit::kAModMask) & mask;
- *(uint32 *)out = pixout | pix;
-
- if (doscale)
- scaleXCtr += args.scaleX;
- else
- in += args.inStep;
- out += 4;
- }
- if (doscale)
- scaleYCtr += args.scaleY;
- else
- args.ino += args.inoStep;
- args.outo += args.dstPitch;
- }
-}
-
}; // end of class BlendBlitImpl_Default
-template<>
-inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::OpaqueBlend, true, false, false>(BlendBlit::Args &args) {
- doBlitOpaqueBlendLogicGeneric<true>(args);
-}
-
-template<>
-inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::OpaqueBlend, false, false, false>(BlendBlit::Args &args) {
- doBlitOpaqueBlendLogicGeneric<false>(args);
-}
-
-template<>
-inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::BinaryBlend, true, false, false>(BlendBlit::Args &args) {
- doBlitBinaryBlendLogicGeneric<true>(args);
-}
-
-template<>
-inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::BinaryBlend, false, false, false>(BlendBlit::Args &args) {
- doBlitBinaryBlendLogicGeneric<false>(args);
-}
-
void BlendBlit::blitGeneric(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
blitT<BlendBlitImpl_Default>(args, blendMode, alphaType);
}
More information about the Scummvm-git-logs
mailing list