[Scummvm-git-logs] scummvm master -> 94569762cd46122262021c498c1bed596cf7af61

sev- noreply at scummvm.org
Sat Oct 28 22:57:02 UTC 2023


This automated email contains information about 3 new commits which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .

Summary:
9f7b9b4f3a GRAPHICS: Unify common SIMD code for alpha blending
b1863032d4 GRAPHICS: Split BlendBlitImpl_Default into a separate file
94569762cd GRAPHICS: Share code between the generic and SIMD blending functions


Commit: 9f7b9b4f3adc54a6601d51116651a55fc76ccc9f
    https://github.com/scummvm/scummvm/commit/9f7b9b4f3adc54a6601d51116651a55fc76ccc9f
Author: Cameron Cawley (ccawley2011 at gmail.com)
Date: 2023-10-29T00:56:58+02:00

Commit Message:
GRAPHICS: Unify common SIMD code for alpha blending

Changed paths:
  A graphics/blit/blit-alpha.h
    graphics/blit.h
    graphics/blit/blit-avx2.cpp
    graphics/blit/blit-neon.cpp
    graphics/blit/blit-sse2.cpp


diff --git a/graphics/blit.h b/graphics/blit.h
index eca242eb206..5d354f352e3 100644
--- a/graphics/blit.h
+++ b/graphics/blit.h
@@ -226,6 +226,8 @@ private:
 	static void blitAVX2(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType);
 #endif
 	static void blitGeneric(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType);
+	template<class T>
+	static void blitT(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType);
 #undef LOGIC_FUNCS_EXT
 
 	typedef void(*BlitFunc)(Args &, const TSpriteBlendMode &, const AlphaType &);
diff --git a/graphics/blit/blit-alpha.h b/graphics/blit/blit-alpha.h
new file mode 100644
index 00000000000..c147a62cb7a
--- /dev/null
+++ b/graphics/blit/blit-alpha.h
@@ -0,0 +1,221 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "graphics/blit.h"
+
+namespace Graphics {
+
+class BlendBlitImpl_Base {
+	friend class BlendBlit;
+protected:
+
+struct AlphaBlend {
+	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+		if (ina != 0) {
+			uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
+			uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
+			uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
+
+			out[BlendBlit::kAIndex] = 255;
+			out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
+			out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
+			out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
+		}
+	}
+};
+
+struct MultiplyBlend {
+	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+		if (ina != 0) {
+			out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
+			out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
+			out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
+		}
+	}
+};
+
+struct OpaqueBlend {
+	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+		*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
+	}
+};
+
+struct BinaryBlend {
+	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+		uint32 pix = *(const uint32 *)in;
+		int a = in[BlendBlit::kAIndex];
+
+		if (a != 0) {   // Full opacity (Any value not exactly 0 is Opaque here)
+			*(uint32 *)out = pix;
+			out[BlendBlit::kAIndex] = 0xFF;
+		}
+	}
+};
+
+struct AdditiveBlend {
+	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+		if (ina != 0) {
+			out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
+			out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
+			out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
+		}
+	}
+};
+
+struct SubtractiveBlend {
+	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
+		out[BlendBlit::kAIndex] = 255;
+		out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb  * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+		out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg  * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+		out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr *  (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+	}
+};
+
+}; // End of class BlendBlitImpl_Base
+
+template<class T>
+void BlendBlit::blitT(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
+	bool rgbmod   = ((args.color & kRGBModMask) != kRGBModMask);
+	bool alphamod = ((args.color & kAModMask)   != kAModMask);
+	if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
+		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+			T::template blitInnerLoop<T::template OpaqueBlend, false, false, false, false, true>(args);
+		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+			T::template blitInnerLoop<T::template BinaryBlend, false, false, false, false, true>(args);
+		} else {
+			if (blendMode == BLEND_ADDITIVE) {
+				if (rgbmod) {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template AdditiveBlend, false, true, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template AdditiveBlend, false, true, false, false, true>(args);
+					}
+				} else {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template AdditiveBlend, false, false, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template AdditiveBlend, false, false, false, false, true>(args);
+					}
+				}
+			} else if (blendMode == BLEND_SUBTRACTIVE) {
+				if (rgbmod) {
+					T::template blitInnerLoop<T::template SubtractiveBlend, false, true, false, false, true>(args);
+				} else {
+					T::template blitInnerLoop<T::template SubtractiveBlend, false, false, false, false, true>(args);
+				}
+			} else if (blendMode == BLEND_MULTIPLY) {
+				if (rgbmod) {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template MultiplyBlend, false, true, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template MultiplyBlend, false, true, false, false, true>(args);
+					}
+				} else {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template MultiplyBlend, false, false, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template MultiplyBlend, false, false, false, false, true>(args);
+					}
+				}
+			} else {
+				assert(blendMode == BLEND_NORMAL);
+				if (rgbmod) {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template AlphaBlend, false, true, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template AlphaBlend, false, true, false, false, true>(args);
+					}
+				} else {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template AlphaBlend, false, false, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template AlphaBlend, false, false, false, false, true>(args);
+					}
+				}
+			}
+		}
+	} else {
+		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+			T::template blitInnerLoop<T::template OpaqueBlend, true, false, false, false, true>(args);
+		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+			T::template blitInnerLoop<T::template BinaryBlend, true, false, false, false, true>(args);
+		} else {
+			if (blendMode == BLEND_ADDITIVE) {
+				if (rgbmod) {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template AdditiveBlend, true, true, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template AdditiveBlend, true, true, false, false, true>(args);
+					}
+				} else {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template AdditiveBlend, true, false, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template AdditiveBlend, true, false, false, false, true>(args);
+					}
+				}
+			} else if (blendMode == BLEND_SUBTRACTIVE) {
+				if (rgbmod) {
+					T::template blitInnerLoop<T::template SubtractiveBlend, true, true, false, false, true>(args);
+				} else {
+					T::template blitInnerLoop<T::template SubtractiveBlend, true, false, false, false, true>(args);
+				}
+			} else if (blendMode == BLEND_MULTIPLY) {
+				if (rgbmod) {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template MultiplyBlend, true, true, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template MultiplyBlend, true, true, false, false, true>(args);
+					}
+				} else {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template MultiplyBlend, true, false, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template MultiplyBlend, true, false, false, false, true>(args);
+					}
+				}
+			} else {
+				assert(blendMode == BLEND_NORMAL);
+				if (rgbmod) {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template AlphaBlend, true, true, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template AlphaBlend, true, true, false, false, true>(args);
+					}
+				} else {
+					if (alphamod) {
+						T::template blitInnerLoop<T::template AlphaBlend, true, false, true, false, true>(args);
+					} else {
+						T::template blitInnerLoop<T::template AlphaBlend, true, false, false, false, true>(args);
+					}
+				}
+			}
+		}
+	}
+}
+
+} // End of namespace Graphics
diff --git a/graphics/blit/blit-avx2.cpp b/graphics/blit/blit-avx2.cpp
index 85170df00c0..d62e9411af5 100644
--- a/graphics/blit/blit-avx2.cpp
+++ b/graphics/blit/blit-avx2.cpp
@@ -22,16 +22,16 @@
 #include "common/scummsys.h"
 #include <immintrin.h>
 
-#include "graphics/blit.h"
+#include "graphics/blit/blit-alpha.h"
 #include "graphics/pixelformat.h"
 
 namespace Graphics {
 
-class BlendBlitImpl_AVX2 {
+class BlendBlitImpl_AVX2 : public BlendBlitImpl_Base {
 	friend class BlendBlit;
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AlphaBlend {
+struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m256i ina;
 		if (alphamod)
@@ -75,25 +75,10 @@ struct AlphaBlend {
 		src = _mm256_andnot_si256(alphaMask, src);
 		return _mm256_or_si256(dst, src);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-		if (ina != 0) {
-			uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
-			uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
-			uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
-
-			out[BlendBlit::kAIndex] = 255;
-			out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
-			out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
-			out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct MultiplyBlend {
+struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m256i ina, alphaMask;
 		if (alphamod) {
@@ -136,51 +121,27 @@ struct MultiplyBlend {
 		src = _mm256_andnot_si256(alphaMask, src);
 		return _mm256_or_si256(dst, src);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-		if (ina != 0) {
-			out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
-			out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
-			out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct OpaqueBlend {
+struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		return _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct BinaryBlend {
+struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m256i alphaMask = _mm256_cmpeq_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_setzero_si256());
 		dst = _mm256_and_si256(dst, alphaMask);
 		src = _mm256_andnot_si256(alphaMask, _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)));
 		return _mm256_or_si256(src, dst);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 pix = *(const uint32 *)in;
-		int a = in[BlendBlit::kAIndex];
-
-		if (a != 0) {   // Full opacity (Any value not exactly 0 is Opaque here)
-			*(uint32 *)out = pix;
-			out[BlendBlit::kAIndex] = 0xFF;
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AdditiveBlend {
+struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m256i ina;
 		if (alphamod)
@@ -231,20 +192,10 @@ struct AdditiveBlend {
 		src = _mm256_andnot_si256(alphaMask, src);
 		return _mm256_or_si256(dst, src);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-		if (ina != 0) {
-			out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
-			out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
-			out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct SubtractiveBlend {
+struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m256i ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
 		__m256i srcb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
@@ -260,13 +211,6 @@ struct SubtractiveBlend {
 
 		return _mm256_or_si256(_mm256_set1_epi32(BlendBlit::kAModMask), _mm256_or_si256(srcb, _mm256_or_si256(srcg, srcr)));
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		out[BlendBlit::kAIndex] = 255;
-		out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb  * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-		out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg  * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-		out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr *  (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-	}
 };
 
 public:
@@ -352,123 +296,7 @@ static void blitInnerLoop(BlendBlit::Args &args) {
 }; // end of class BlendBlitImpl_AVX2
 
 void BlendBlit::blitAVX2(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
-	bool rgbmod   = ((args.color & kRGBModMask) != kRGBModMask);
-	bool alphamod = ((args.color & kAModMask)   != kAModMask);
-	if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
-		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
-			BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::OpaqueBlend, false, false, false, false, true>(args);
-		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
-			BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::BinaryBlend, false, false, false, false, true>(args);
-		} else {
-			if (blendMode == BLEND_ADDITIVE) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, false, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, false, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, false, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, false, false, false, false, true>(args);
-					}
-				}
-			} else if (blendMode == BLEND_SUBTRACTIVE) {
-				if (rgbmod) {
-					BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::SubtractiveBlend, false, true, false, false, true>(args);
-				} else {
-					BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::SubtractiveBlend, false, false, false, false, true>(args);
-				}
-			} else if (blendMode == BLEND_MULTIPLY) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, false, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, false, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, false, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, false, false, false, false, true>(args);
-					}
-				}
-			} else {
-				assert(blendMode == BLEND_NORMAL);
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, false, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, false, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, false, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, false, false, false, false, true>(args);
-					}
-				}
-			}
-		}
-	} else {
-		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
-			BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::OpaqueBlend, true, false, false, false, true>(args);
-		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
-			BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::BinaryBlend, true, false, false, false, true>(args);
-		} else {
-			if (blendMode == BLEND_ADDITIVE) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, true, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, true, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, true, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AdditiveBlend, true, false, false, false, true>(args);
-					}
-				}
-			} else if (blendMode == BLEND_SUBTRACTIVE) {
-				if (rgbmod) {
-					BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::SubtractiveBlend, true, true, false, false, true>(args);
-				} else {
-					BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::SubtractiveBlend, true, false, false, false, true>(args);
-				}
-			} else if (blendMode == BLEND_MULTIPLY) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, true, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, true, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, true, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::MultiplyBlend, true, false, false, false, true>(args);
-					}
-				}
-			} else {
-				assert(blendMode == BLEND_NORMAL);
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, true, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, true, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, true, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_AVX2::blitInnerLoop<BlendBlitImpl_AVX2::AlphaBlend, true, false, false, false, true>(args);
-					}
-				}
-			}
-		}
-	}
+	blitT<BlendBlitImpl_AVX2>(args, blendMode, alphaType);
 }
 
 } // End of namespace Graphics
diff --git a/graphics/blit/blit-neon.cpp b/graphics/blit/blit-neon.cpp
index 58bc8a3747f..0e737d31438 100644
--- a/graphics/blit/blit-neon.cpp
+++ b/graphics/blit/blit-neon.cpp
@@ -24,16 +24,16 @@
 #ifdef SCUMMVM_NEON
 #include <arm_neon.h>
 
-#include "graphics/blit.h"
+#include "graphics/blit/blit-alpha.h"
 #include "graphics/pixelformat.h"
 
 namespace Graphics {
 
-class BlendBlitImpl_NEON {
+class BlendBlitImpl_NEON : public BlendBlitImpl_Base {
 	friend class BlendBlit;
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AlphaBlend {
+struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32x4_t ina;
 		if (alphamod)
@@ -77,25 +77,10 @@ struct AlphaBlend {
 		src = vandq_u32(vmvnq_u32(alphaMask), src);
 		return vorrq_u32(dst, src);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-		if (ina != 0) {
-			uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
-			uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
-			uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
-
-			out[BlendBlit::kAIndex] = 255;
-			out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
-			out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
-			out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct MultiplyBlend {
+struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32x4_t ina;
 		if (alphamod)
@@ -133,51 +118,27 @@ struct MultiplyBlend {
 		src = vandq_u32(vmvnq_u32(alphaMask), src);
 		return vorrq_u32(dst, src);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-		if (ina != 0) {
-			out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
-			out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
-			out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct OpaqueBlend {
+struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		return vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct BinaryBlend {
+struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32x4_t alphaMask = vceqq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmovq_n_u32(0));
 		dst = vandq_u32(dst, alphaMask);
 		src = vandq_u32(vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmvnq_u32(alphaMask));
 		return vorrq_u32(dst, src);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 pix = *(const uint32 *)in;
-		int a = in[BlendBlit::kAIndex];
-
-		if (a != 0) {   // Full opacity (Any value not exactly 0 is Opaque here)
-			*(uint32 *)out = pix;
-			out[BlendBlit::kAIndex] = 0xFF;
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AdditiveBlend {
+struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32x4_t ina;
 		if (alphamod)
@@ -228,20 +189,10 @@ struct AdditiveBlend {
 		src = vandq_u32(vmvnq_u32(alphaMask), src);
 		return vorrq_u32(dst, src);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-		if (ina != 0) {
-			out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
-			out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
-			out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct SubtractiveBlend {
+struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32x4_t ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
 		uint32x4_t srcb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
@@ -257,13 +208,6 @@ struct SubtractiveBlend {
 
 		return vorrq_u32(vmovq_n_u32(BlendBlit::kAModMask), vorrq_u32(srcb, vorrq_u32(srcg, srcr)));
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		out[BlendBlit::kAIndex] = 255;
-		out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb  * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-		out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg  * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-		out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr *  (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-	}
 };
 
 public:
@@ -346,123 +290,7 @@ static inline void blitInnerLoop(BlendBlit::Args &args) {
 }; // end of class BlendBlitImpl_NEON
 
 void BlendBlit::blitNEON(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
-	bool rgbmod   = ((args.color & kRGBModMask) != kRGBModMask);
-	bool alphamod = ((args.color & kAModMask)   != kAModMask);
-	if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
-		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
-			BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::OpaqueBlend, false, false, false, false, true>(args);
-		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
-			BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::BinaryBlend, false, false, false, false, true>(args);
-		} else {
-			if (blendMode == BLEND_ADDITIVE) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, false, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, false, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, false, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, false, false, false, false, true>(args);
-					}
-				}
-			} else if (blendMode == BLEND_SUBTRACTIVE) {
-				if (rgbmod) {
-					BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::SubtractiveBlend, false, true, false, false, true>(args);
-				} else {
-					BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::SubtractiveBlend, false, false, false, false, true>(args);
-				}
-			} else if (blendMode == BLEND_MULTIPLY) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, false, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, false, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, false, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, false, false, false, false, true>(args);
-					}
-				}
-			} else {
-				assert(blendMode == BLEND_NORMAL);
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, false, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, false, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, false, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, false, false, false, false, true>(args);
-					}
-				}
-			}
-		}
-	} else {
-		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
-			BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::OpaqueBlend, true, false, false, false, true>(args);
-		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
-			BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::BinaryBlend, true, false, false, false, true>(args);
-		} else {
-			if (blendMode == BLEND_ADDITIVE) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, true, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, true, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, true, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AdditiveBlend, true, false, false, false, true>(args);
-					}
-				}
-			} else if (blendMode == BLEND_SUBTRACTIVE) {
-				if (rgbmod) {
-					BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::SubtractiveBlend, true, true, false, false, true>(args);
-				} else {
-					BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::SubtractiveBlend, true, false, false, false, true>(args);
-				}
-			} else if (blendMode == BLEND_MULTIPLY) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, true, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, true, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, true, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::MultiplyBlend, true, false, false, false, true>(args);
-					}
-				}
-			} else {
-				assert(blendMode == BLEND_NORMAL);
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, true, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, true, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, true, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_NEON::blitInnerLoop<BlendBlitImpl_NEON::AlphaBlend, true, false, false, false, true>(args);
-					}
-				}
-			}
-		}
-	}
+	blitT<BlendBlitImpl_NEON>(args, blendMode, alphaType);
 }
 
 } // end of namespace Graphics
diff --git a/graphics/blit/blit-sse2.cpp b/graphics/blit/blit-sse2.cpp
index 1962621c68c..8bb5db923af 100644
--- a/graphics/blit/blit-sse2.cpp
+++ b/graphics/blit/blit-sse2.cpp
@@ -22,7 +22,7 @@
 #include "common/scummsys.h"
 #include <immintrin.h>
 
-#include "graphics/blit.h"
+#include "graphics/blit/blit-alpha.h"
 #include "graphics/pixelformat.h"
 
 namespace Graphics {
@@ -33,11 +33,11 @@ static FORCEINLINE __m128i sse2_mul32(__m128i a, __m128i b) {
 	return _mm_unpacklo_epi32(even, odd);
 }
 
-class BlendBlitImpl_SSE2 {
+class BlendBlitImpl_SSE2 : public BlendBlitImpl_Base {
 	friend class BlendBlit;
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AlphaBlend {
+struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m128i ina;
 		if (alphamod)
@@ -81,25 +81,10 @@ struct AlphaBlend {
 		src = _mm_andnot_si128(alphaMask, src);
 		return _mm_or_si128(dst, src);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-		if (ina != 0) {
-			uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
-			uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
-			uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
-
-			out[BlendBlit::kAIndex] = 255;
-			out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
-			out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
-			out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct MultiplyBlend {
+struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m128i ina, alphaMask;
 		if (alphamod) {
@@ -142,51 +127,27 @@ struct MultiplyBlend {
 		src = _mm_andnot_si128(alphaMask, src);
 		return _mm_or_si128(dst, src);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-		if (ina != 0) {
-			out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
-			out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
-			out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct OpaqueBlend {
+struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		return _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct BinaryBlend {
+struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m128i alphaMask = _mm_cmpeq_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_setzero_si128());
 		dst = _mm_and_si128(dst, alphaMask);
 		src = _mm_andnot_si128(alphaMask, _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)));
 		return _mm_or_si128(src, dst);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 pix = *(const uint32 *)in;
-		int a = in[BlendBlit::kAIndex];
-
-		if (a != 0) {   // Full opacity (Any value not exactly 0 is Opaque here)
-			*(uint32 *)out = pix;
-			out[BlendBlit::kAIndex] = 0xFF;
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AdditiveBlend {
+struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m128i ina;
 		if (alphamod)
@@ -237,20 +198,10 @@ struct AdditiveBlend {
 		src = _mm_andnot_si128(alphaMask, src);
 		return _mm_or_si128(dst, src);
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-		if (ina != 0) {
-			out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
-			out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
-			out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
-		}
-	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct SubtractiveBlend {
+struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m128i ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
 		__m128i srcb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
@@ -266,13 +217,6 @@ struct SubtractiveBlend {
 
 		return _mm_or_si128(_mm_set1_epi32(BlendBlit::kAModMask), _mm_or_si128(srcb, _mm_or_si128(srcg, srcr)));
 	}
-
-	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
-		out[BlendBlit::kAIndex] = 255;
-		out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb  * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-		out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg  * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-		out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr *  (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-	}
 };
 
 public:
@@ -353,123 +297,7 @@ static inline void blitInnerLoop(BlendBlit::Args &args) {
 }; // End of class BlendBlitImpl_SSE2
 
 void BlendBlit::blitSSE2(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
-	bool rgbmod   = ((args.color & kRGBModMask) != kRGBModMask);
-	bool alphamod = ((args.color & kAModMask)   != kAModMask);
-	if (args.scaleX == SCALE_THRESHOLD && args.scaleY == SCALE_THRESHOLD) {
-		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
-			BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::OpaqueBlend, false, false, false, false, true>(args);
-		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
-			BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::BinaryBlend, false, false, false, false, true>(args);
-		} else {
-			if (blendMode == BLEND_ADDITIVE) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, false, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, false, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, false, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, false, false, false, false, true>(args);
-					}
-				}
-			} else if (blendMode == BLEND_SUBTRACTIVE) {
-				if (rgbmod) {
-					BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::SubtractiveBlend, false, true, false, false, true>(args);
-				} else {
-					BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::SubtractiveBlend, false, false, false, false, true>(args);
-				}
-			} else if (blendMode == BLEND_MULTIPLY) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, false, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, false, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, false, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, false, false, false, false, true>(args);
-					}
-				}
-			} else {
-				assert(blendMode == BLEND_NORMAL);
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, false, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, false, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, false, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, false, false, false, false, true>(args);
-					}
-				}
-			}
-		}
-	} else {
-		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
-			BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::OpaqueBlend, true, false, false, false, true>(args);
-		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
-			BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::BinaryBlend, true, false, false, false, true>(args);
-		} else {
-			if (blendMode == BLEND_ADDITIVE) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, true, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, true, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, true, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AdditiveBlend, true, false, false, false, true>(args);
-					}
-				}
-			} else if (blendMode == BLEND_SUBTRACTIVE) {
-				if (rgbmod) {
-					BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::SubtractiveBlend, true, true, false, false, true>(args);
-				} else {
-					BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::SubtractiveBlend, true, false, false, false, true>(args);
-				}
-			} else if (blendMode == BLEND_MULTIPLY) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, true, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, true, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, true, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::MultiplyBlend, true, false, false, false, true>(args);
-					}
-				}
-			} else {
-				assert(blendMode == BLEND_NORMAL);
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, true, true, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, true, true, false, false, true>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, true, false, true, false, true>(args);
-					} else {
-						BlendBlitImpl_SSE2::blitInnerLoop<BlendBlitImpl_SSE2::AlphaBlend, true, false, false, false, true>(args);
-					}
-				}
-			}
-		}
-	}
+	blitT<BlendBlitImpl_SSE2>(args, blendMode, alphaType);
 }
 
 } // End of namespace Graphics


Commit: b1863032d47f7b9bd0d370f43cf1c5dcf4c05fbf
    https://github.com/scummvm/scummvm/commit/b1863032d47f7b9bd0d370f43cf1c5dcf4c05fbf
Author: Cameron Cawley (ccawley2011 at gmail.com)
Date: 2023-10-29T00:56:58+02:00

Commit Message:
GRAPHICS: Split BlendBlitImpl_Default into a separate file

Changed paths:
  A graphics/blit/blit-generic.cpp
    graphics/blit/blit-alpha.cpp
    graphics/module.mk


diff --git a/graphics/blit/blit-alpha.cpp b/graphics/blit/blit-alpha.cpp
index 5975410c69a..dc4c23ea24e 100644
--- a/graphics/blit/blit-alpha.cpp
+++ b/graphics/blit/blit-alpha.cpp
@@ -238,431 +238,4 @@ void BlendBlit::blit(byte *dst, const byte *src,
 	blitFunc(args, blendMode, alphaType);
 }
 
-class BlendBlitImpl_Default {
-public:
-/**
- * Optimized version of doBlit to be used with multiply blended blitting
- */
-template<bool doscale, bool rgbmod, bool alphamod>
-static void doBlitMultiplyBlendLogicGeneric(BlendBlit::Args &args) {
-	const byte *in;
-	byte *out;
-
-	int scaleXCtr, scaleYCtr = args.scaleYoff;
-	const byte *inBase;
-
-	const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
-	const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
-	const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
-	const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
-	const uint32 cr = rgbmod   ? (rawcr == 255 ? 256 : rawcr) : 256;
-	const uint32 cg = rgbmod   ? (rawcg == 255 ? 256 : rawcg) : 256;
-	const uint32 cb = rgbmod   ? (rawcb == 255 ? 256 : rawcb) : 256;
-
-	for (uint32 i = 0; i < args.height; i++) {
-		if (doscale) {
-			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
-			scaleXCtr = args.scaleXoff;
-		} else {
-			in = args.ino;
-		}
-		out = args.outo;
-		for (uint32 j = 0; j < args.width; j++) {
-			if (doscale) {
-				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
-			}
-
-			uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-			if (ina != 0) {
-				out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
-				out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
-				out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
-			}
-
-			if (doscale)
-				scaleXCtr += args.scaleX;
-			else
-				in += args.inStep;
-			out += 4;
-		}
-		if (doscale)
-			scaleYCtr += args.scaleY;
-		else
-			args.ino += args.inoStep;
-		args.outo += args.dstPitch;
-	}
-
-}
-
-template<bool doscale, bool rgbmod, bool alphamod>
-static void doBlitAlphaBlendLogicGeneric(BlendBlit::Args &args) {
-	const byte *in;
-	byte *out;
-
-	int scaleXCtr, scaleYCtr = args.scaleYoff;
-	const byte *inBase;
-
-	const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
-	const byte cr = rgbmod   ? ((args.color >> BlendBlit::kRModShift) & 0xFF) : 255;
-	const byte cg = rgbmod   ? ((args.color >> BlendBlit::kGModShift) & 0xFF) : 255;
-	const byte cb = rgbmod   ? ((args.color >> BlendBlit::kBModShift) & 0xFF) : 255;
-
-	for (uint32 i = 0; i < args.height; i++) {
-		if (doscale) {
-			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
-			scaleXCtr = args.scaleXoff;
-		} else {
-			in = args.ino;
-		}
-		out = args.outo;
-		for (uint32 j = 0; j < args.width; j++) {
-			if (doscale) {
-				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
-			}
-
-			uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-			if (ina != 0) {
-				if (rgbmod) {
-					const uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
-					const uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
-					const uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
-
-					out[BlendBlit::kAIndex] = 255;
-					out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
-					out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
-					out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
-				} else {
-					out[BlendBlit::kAIndex] = 255;
-					out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * (255 - ina) + in[BlendBlit::kBIndex] * ina) >> 8;
-					out[BlendBlit::kGIndex] = (out[BlendBlit::kGIndex] * (255 - ina) + in[BlendBlit::kGIndex] * ina) >> 8;
-					out[BlendBlit::kRIndex] = (out[BlendBlit::kRIndex] * (255 - ina) + in[BlendBlit::kRIndex] * ina) >> 8;
-					
-				}
-			}
-
-			if (doscale)
-				scaleXCtr += args.scaleX;
-			else
-				in += args.inStep;
-			out += 4;
-		}
-
-		if (doscale)
-			scaleYCtr += args.scaleY;
-		else
-			args.ino += args.inoStep;
-		args.outo += args.dstPitch;
-	}
-}
-
-/**
- * Optimized version of doBlit to be used with subtractive blended blitting
- */
-template<bool doscale, bool rgbmod>
-static void doBlitSubtractiveBlendLogicGeneric(BlendBlit::Args &args) {
-	const byte *in;
-	byte *out;
-
-	int scaleXCtr, scaleYCtr = args.scaleYoff;
-	const byte *inBase;
-
-	const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
-	const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
-	const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
-	const uint32 cr = rgbmod   ? (rawcr == 255 ? 256 : rawcr) : 256;
-	const uint32 cg = rgbmod   ? (rawcg == 255 ? 256 : rawcg) : 256;
-	const uint32 cb = rgbmod   ? (rawcb == 255 ? 256 : rawcb) : 256;
-
-	for (uint32 i = 0; i < args.height; i++) {
-		if (doscale) {
-			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
-			scaleXCtr = args.scaleXoff;
-		} else {
-			in = args.ino;
-		}
-		out = args.outo;
-		for (uint32 j = 0; j < args.width; j++) {
-			if (doscale) {
-				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
-			}
-
-			out[BlendBlit::kAIndex] = 255;
-			out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb  * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-			out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg  * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-			out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-
-			if (doscale)
-				scaleXCtr += args.scaleX;
-			else
-				in += args.inStep;
-			out += 4;
-		}
-		if (doscale)
-			scaleYCtr += args.scaleY;
-		else
-			args.ino += args.inoStep;
-		args.outo += args.dstPitch;
-	}
-}
-
-/**
- * Optimized version of doBlit to be used with additive blended blitting
- */
-template<bool doscale, bool rgbmod, bool alphamod>
-static void doBlitAdditiveBlendLogicGeneric(BlendBlit::Args &args) {
-	const byte *in;
-	byte *out;
-
-	int scaleXCtr, scaleYCtr = args.scaleYoff;
-	const byte *inBase;
-
-	const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
-	const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
-	const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
-	const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
-	const uint32 cr = rgbmod   ? (rawcr == 255 ? 256 : rawcr) : 256;
-	const uint32 cg = rgbmod   ? (rawcg == 255 ? 256 : rawcg) : 256;
-	const uint32 cb = rgbmod   ? (rawcb == 255 ? 256 : rawcb) : 256;
-
-	for (uint32 i = 0; i < args.height; i++) {
-		if (doscale) {
-			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
-			scaleXCtr = args.scaleXoff;
-		} else {
-			in = args.ino;
-		}
-		out = args.outo;
-		for (uint32 j = 0; j < args.width; j++) {
-			if (doscale) {
-				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
-			}
-
-			uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-			if (ina != 0) {
-				out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
-				out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
-				out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
-			}
-
-			if (doscale)
-				scaleXCtr += args.scaleX;
-			else
-				in += args.inStep;
-			out += 4;
-		}
-
-		if (doscale)
-			scaleYCtr += args.scaleY;
-		else
-			args.ino += args.inoStep;
-		args.outo += args.dstPitch;
-	}
-}
-
-template<bool doscale>
-static void doBlitOpaqueBlendLogicGeneric(BlendBlit::Args &args) {
-	const byte *in;
-	byte *out;
-
-	int scaleXCtr, scaleYCtr = args.scaleYoff;
-	const byte *inBase;
-
-	for (uint32 i = 0; i < args.height; i++) {
-		if (doscale) {
-			inBase = args.ino + (scaleYCtr + 1) / BlendBlit::SCALE_THRESHOLD * args.inoStep;
-			scaleXCtr = args.scaleXoff;
-		} else {
-			in = args.ino;
-		}
-		out = args.outo;
-
-		if (doscale) {
-			for (uint32 j = 0; j < args.width; j++) {
-				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
-				*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
-				scaleXCtr += args.scaleX;
-				out += 4;
-			}
-		} else {
-			for (uint32 j = 0; j < args.width; j++) {
-				*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
-				in += args.inStep;
-				out += 4;
-			}
-		}
-
-		if (doscale)
-			scaleYCtr += args.scaleY;
-		else
-			args.ino += args.inoStep;
-		args.outo += args.dstPitch;
-	}
-}
-
-template<bool doscale>
-static void doBlitBinaryBlendLogicGeneric(BlendBlit::Args &args) {
-	const byte *in;
-	byte *out;
-
-	int scaleXCtr, scaleYCtr = args.scaleYoff;
-	const byte *inBase;
-
-	for (uint32 i = 0; i < args.height; i++) {
-		if (doscale) {
-			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
-			scaleXCtr = args.scaleXoff;
-		} else {
-			in = args.ino;
-		}
-		out = args.outo;
-		for (uint32 j = 0; j < args.width; j++) {
-			if (doscale) {
-				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
-			}
-
-			uint32 pix = *(const uint32 *)in, pixout = *(const uint32 *)out;
-			uint32 mask = (pix & BlendBlit::kAModMask) ? 0xffffffff : 0;
-			pixout &= ~mask;
-			pix = (pix | BlendBlit::kAModMask) & mask;
-			*(uint32 *)out = pixout | pix;
-			
-			if (doscale)
-				scaleXCtr += args.scaleX;
-			else
-				in += args.inStep;
-			out += 4;
-		}
-		if (doscale)
-			scaleYCtr += args.scaleY;
-		else
-			args.ino += args.inoStep;
-		args.outo += args.dstPitch;
-	}
-}
-
-}; // end of class BlendBlitImpl_Default
-
-void BlendBlit::blitGeneric(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
-	bool rgbmod   = ((args.color & kRGBModMask) != kRGBModMask);
-	bool alphamod = ((args.color & kAModMask)   != kAModMask);
-	if (args.scaleX == BlendBlit::SCALE_THRESHOLD && args.scaleY == BlendBlit::SCALE_THRESHOLD) {
-		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
-			BlendBlitImpl_Default::doBlitOpaqueBlendLogicGeneric<false>(args);
-		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
-			BlendBlitImpl_Default::doBlitBinaryBlendLogicGeneric<false>(args);
-		} else {
-			if (blendMode == BLEND_ADDITIVE) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, false, false>(args);
-					}
-				}
-			} else if (blendMode == BLEND_SUBTRACTIVE) {
-				if (rgbmod) {
-					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<false, true>(args);
-				} else {
-					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<false, false>(args);
-				}
-			} else if (blendMode == BLEND_MULTIPLY) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, false, false>(args);
-					}
-				}
-			} else {
-				assert(blendMode == BLEND_NORMAL);
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, false, false>(args);
-					}
-				}
-			}
-		}
-	} else {
-		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
-			BlendBlitImpl_Default::doBlitOpaqueBlendLogicGeneric<true>(args);
-		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
-			BlendBlitImpl_Default::doBlitBinaryBlendLogicGeneric<true>(args);
-		} else {
-			if (blendMode == BLEND_ADDITIVE) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, false, false>(args);
-					}
-				}
-			} else if (blendMode == BLEND_SUBTRACTIVE) {
-				if (rgbmod) {
-					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<true, true>(args);
-				} else {
-					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<true, false>(args);
-				}
-			} else if (blendMode == BLEND_MULTIPLY) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, false, false>(args);
-					}
-				}
-			} else {
-				assert(blendMode == BLEND_NORMAL);
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, false, false>(args);
-					}
-				}
-			}
-		}
-	}
-}
-
 } // End of namespace Graphics
diff --git a/graphics/blit/blit-generic.cpp b/graphics/blit/blit-generic.cpp
new file mode 100644
index 00000000000..4d25769c035
--- /dev/null
+++ b/graphics/blit/blit-generic.cpp
@@ -0,0 +1,455 @@
+/* ScummVM - Graphic Adventure Engine
+ *
+ * ScummVM is the legal property of its developers, whose names
+ * are too numerous to list here. Please refer to the COPYRIGHT
+ * file distributed with this source distribution.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "common/system.h"
+#include "graphics/blit.h"
+#include "graphics/pixelformat.h"
+
+namespace Graphics {
+
+class BlendBlitImpl_Default {
+public:
+/**
+ * Optimized version of doBlit to be used with multiply blended blitting
+ */
+template<bool doscale, bool rgbmod, bool alphamod>
+static void doBlitMultiplyBlendLogicGeneric(BlendBlit::Args &args) {
+	const byte *in;
+	byte *out;
+
+	int scaleXCtr, scaleYCtr = args.scaleYoff;
+	const byte *inBase;
+
+	const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
+	const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
+	const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
+	const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
+	const uint32 cr = rgbmod   ? (rawcr == 255 ? 256 : rawcr) : 256;
+	const uint32 cg = rgbmod   ? (rawcg == 255 ? 256 : rawcg) : 256;
+	const uint32 cb = rgbmod   ? (rawcb == 255 ? 256 : rawcb) : 256;
+
+	for (uint32 i = 0; i < args.height; i++) {
+		if (doscale) {
+			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+			scaleXCtr = args.scaleXoff;
+		} else {
+			in = args.ino;
+		}
+		out = args.outo;
+		for (uint32 j = 0; j < args.width; j++) {
+			if (doscale) {
+				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+			}
+
+			uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+			if (ina != 0) {
+				out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
+				out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
+				out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
+			}
+
+			if (doscale)
+				scaleXCtr += args.scaleX;
+			else
+				in += args.inStep;
+			out += 4;
+		}
+		if (doscale)
+			scaleYCtr += args.scaleY;
+		else
+			args.ino += args.inoStep;
+		args.outo += args.dstPitch;
+	}
+
+}
+
+template<bool doscale, bool rgbmod, bool alphamod>
+static void doBlitAlphaBlendLogicGeneric(BlendBlit::Args &args) {
+	const byte *in;
+	byte *out;
+
+	int scaleXCtr, scaleYCtr = args.scaleYoff;
+	const byte *inBase;
+
+	const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
+	const byte cr = rgbmod   ? ((args.color >> BlendBlit::kRModShift) & 0xFF) : 255;
+	const byte cg = rgbmod   ? ((args.color >> BlendBlit::kGModShift) & 0xFF) : 255;
+	const byte cb = rgbmod   ? ((args.color >> BlendBlit::kBModShift) & 0xFF) : 255;
+
+	for (uint32 i = 0; i < args.height; i++) {
+		if (doscale) {
+			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+			scaleXCtr = args.scaleXoff;
+		} else {
+			in = args.ino;
+		}
+		out = args.outo;
+		for (uint32 j = 0; j < args.width; j++) {
+			if (doscale) {
+				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+			}
+
+			uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+			if (ina != 0) {
+				if (rgbmod) {
+					const uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
+					const uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
+					const uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
+
+					out[BlendBlit::kAIndex] = 255;
+					out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
+					out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
+					out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
+				} else {
+					out[BlendBlit::kAIndex] = 255;
+					out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * (255 - ina) + in[BlendBlit::kBIndex] * ina) >> 8;
+					out[BlendBlit::kGIndex] = (out[BlendBlit::kGIndex] * (255 - ina) + in[BlendBlit::kGIndex] * ina) >> 8;
+					out[BlendBlit::kRIndex] = (out[BlendBlit::kRIndex] * (255 - ina) + in[BlendBlit::kRIndex] * ina) >> 8;
+					
+				}
+			}
+
+			if (doscale)
+				scaleXCtr += args.scaleX;
+			else
+				in += args.inStep;
+			out += 4;
+		}
+
+		if (doscale)
+			scaleYCtr += args.scaleY;
+		else
+			args.ino += args.inoStep;
+		args.outo += args.dstPitch;
+	}
+}
+
+/**
+ * Optimized version of doBlit to be used with subtractive blended blitting
+ */
+template<bool doscale, bool rgbmod>
+static void doBlitSubtractiveBlendLogicGeneric(BlendBlit::Args &args) {
+	const byte *in;
+	byte *out;
+
+	int scaleXCtr, scaleYCtr = args.scaleYoff;
+	const byte *inBase;
+
+	const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
+	const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
+	const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
+	const uint32 cr = rgbmod   ? (rawcr == 255 ? 256 : rawcr) : 256;
+	const uint32 cg = rgbmod   ? (rawcg == 255 ? 256 : rawcg) : 256;
+	const uint32 cb = rgbmod   ? (rawcb == 255 ? 256 : rawcb) : 256;
+
+	for (uint32 i = 0; i < args.height; i++) {
+		if (doscale) {
+			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+			scaleXCtr = args.scaleXoff;
+		} else {
+			in = args.ino;
+		}
+		out = args.outo;
+		for (uint32 j = 0; j < args.width; j++) {
+			if (doscale) {
+				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+			}
+
+			out[BlendBlit::kAIndex] = 255;
+			out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb  * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+			out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg  * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+			out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
+
+			if (doscale)
+				scaleXCtr += args.scaleX;
+			else
+				in += args.inStep;
+			out += 4;
+		}
+		if (doscale)
+			scaleYCtr += args.scaleY;
+		else
+			args.ino += args.inoStep;
+		args.outo += args.dstPitch;
+	}
+}
+
+/**
+ * Optimized version of doBlit to be used with additive blended blitting
+ */
+template<bool doscale, bool rgbmod, bool alphamod>
+static void doBlitAdditiveBlendLogicGeneric(BlendBlit::Args &args) {
+	const byte *in;
+	byte *out;
+
+	int scaleXCtr, scaleYCtr = args.scaleYoff;
+	const byte *inBase;
+
+	const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
+	const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
+	const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
+	const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
+	const uint32 cr = rgbmod   ? (rawcr == 255 ? 256 : rawcr) : 256;
+	const uint32 cg = rgbmod   ? (rawcg == 255 ? 256 : rawcg) : 256;
+	const uint32 cb = rgbmod   ? (rawcb == 255 ? 256 : rawcb) : 256;
+
+	for (uint32 i = 0; i < args.height; i++) {
+		if (doscale) {
+			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+			scaleXCtr = args.scaleXoff;
+		} else {
+			in = args.ino;
+		}
+		out = args.outo;
+		for (uint32 j = 0; j < args.width; j++) {
+			if (doscale) {
+				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+			}
+
+			uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
+
+			if (ina != 0) {
+				out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
+				out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
+				out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
+			}
+
+			if (doscale)
+				scaleXCtr += args.scaleX;
+			else
+				in += args.inStep;
+			out += 4;
+		}
+
+		if (doscale)
+			scaleYCtr += args.scaleY;
+		else
+			args.ino += args.inoStep;
+		args.outo += args.dstPitch;
+	}
+}
+
+template<bool doscale>
+static void doBlitOpaqueBlendLogicGeneric(BlendBlit::Args &args) {
+	const byte *in;
+	byte *out;
+
+	int scaleXCtr, scaleYCtr = args.scaleYoff;
+	const byte *inBase;
+
+	for (uint32 i = 0; i < args.height; i++) {
+		if (doscale) {
+			inBase = args.ino + (scaleYCtr + 1) / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+			scaleXCtr = args.scaleXoff;
+		} else {
+			in = args.ino;
+		}
+		out = args.outo;
+
+		if (doscale) {
+			for (uint32 j = 0; j < args.width; j++) {
+				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+				*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
+				scaleXCtr += args.scaleX;
+				out += 4;
+			}
+		} else {
+			for (uint32 j = 0; j < args.width; j++) {
+				*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
+				in += args.inStep;
+				out += 4;
+			}
+		}
+
+		if (doscale)
+			scaleYCtr += args.scaleY;
+		else
+			args.ino += args.inoStep;
+		args.outo += args.dstPitch;
+	}
+}
+
+template<bool doscale>
+static void doBlitBinaryBlendLogicGeneric(BlendBlit::Args &args) {
+	const byte *in;
+	byte *out;
+
+	int scaleXCtr, scaleYCtr = args.scaleYoff;
+	const byte *inBase;
+
+	for (uint32 i = 0; i < args.height; i++) {
+		if (doscale) {
+			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
+			scaleXCtr = args.scaleXoff;
+		} else {
+			in = args.ino;
+		}
+		out = args.outo;
+		for (uint32 j = 0; j < args.width; j++) {
+			if (doscale) {
+				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
+			}
+
+			uint32 pix = *(const uint32 *)in, pixout = *(const uint32 *)out;
+			uint32 mask = (pix & BlendBlit::kAModMask) ? 0xffffffff : 0;
+			pixout &= ~mask;
+			pix = (pix | BlendBlit::kAModMask) & mask;
+			*(uint32 *)out = pixout | pix;
+			
+			if (doscale)
+				scaleXCtr += args.scaleX;
+			else
+				in += args.inStep;
+			out += 4;
+		}
+		if (doscale)
+			scaleYCtr += args.scaleY;
+		else
+			args.ino += args.inoStep;
+		args.outo += args.dstPitch;
+	}
+}
+
+}; // end of class BlendBlitImpl_Default
+
+void BlendBlit::blitGeneric(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
+	bool rgbmod   = ((args.color & kRGBModMask) != kRGBModMask);
+	bool alphamod = ((args.color & kAModMask)   != kAModMask);
+	if (args.scaleX == BlendBlit::SCALE_THRESHOLD && args.scaleY == BlendBlit::SCALE_THRESHOLD) {
+		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+			BlendBlitImpl_Default::doBlitOpaqueBlendLogicGeneric<false>(args);
+		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+			BlendBlitImpl_Default::doBlitBinaryBlendLogicGeneric<false>(args);
+		} else {
+			if (blendMode == BLEND_ADDITIVE) {
+				if (rgbmod) {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, true, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, true, false>(args);
+					}
+				} else {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, false, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, false, false>(args);
+					}
+				}
+			} else if (blendMode == BLEND_SUBTRACTIVE) {
+				if (rgbmod) {
+					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<false, true>(args);
+				} else {
+					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<false, false>(args);
+				}
+			} else if (blendMode == BLEND_MULTIPLY) {
+				if (rgbmod) {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, true, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, true, false>(args);
+					}
+				} else {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, false, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, false, false>(args);
+					}
+				}
+			} else {
+				assert(blendMode == BLEND_NORMAL);
+				if (rgbmod) {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, true, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, true, false>(args);
+					}
+				} else {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, false, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, false, false>(args);
+					}
+				}
+			}
+		}
+	} else {
+		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
+			BlendBlitImpl_Default::doBlitOpaqueBlendLogicGeneric<true>(args);
+		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
+			BlendBlitImpl_Default::doBlitBinaryBlendLogicGeneric<true>(args);
+		} else {
+			if (blendMode == BLEND_ADDITIVE) {
+				if (rgbmod) {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, true, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, true, false>(args);
+					}
+				} else {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, false, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, false, false>(args);
+					}
+				}
+			} else if (blendMode == BLEND_SUBTRACTIVE) {
+				if (rgbmod) {
+					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<true, true>(args);
+				} else {
+					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<true, false>(args);
+				}
+			} else if (blendMode == BLEND_MULTIPLY) {
+				if (rgbmod) {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, true, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, true, false>(args);
+					}
+				} else {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, false, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, false, false>(args);
+					}
+				}
+			} else {
+				assert(blendMode == BLEND_NORMAL);
+				if (rgbmod) {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, true, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, true, false>(args);
+					}
+				} else {
+					if (alphamod) {
+						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, false, true>(args);
+					} else {
+						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, false, false>(args);
+					}
+				}
+			}
+		}
+	}
+}
+
+} // End of namespace Graphics
diff --git a/graphics/module.mk b/graphics/module.mk
index 1cedbcd6e01..8bcca735735 100644
--- a/graphics/module.mk
+++ b/graphics/module.mk
@@ -4,6 +4,7 @@ MODULE_OBJS := \
 	big5.o \
 	blit/blit.o \
 	blit/blit-alpha.o \
+	blit/blit-generic.o \
 	blit/blit-scale.o \
 	cursorman.o \
 	font.o \


Commit: 94569762cd46122262021c498c1bed596cf7af61
    https://github.com/scummvm/scummvm/commit/94569762cd46122262021c498c1bed596cf7af61
Author: Cameron Cawley (ccawley2011 at gmail.com)
Date: 2023-10-29T00:56:58+02:00

Commit Message:
GRAPHICS: Share code between the generic and SIMD blending functions

Changed paths:
    graphics/blit/blit-alpha.h
    graphics/blit/blit-avx2.cpp
    graphics/blit/blit-generic.cpp
    graphics/blit/blit-neon.cpp
    graphics/blit/blit-sse2.cpp


diff --git a/graphics/blit/blit-alpha.h b/graphics/blit/blit-alpha.h
index c147a62cb7a..0abc3094895 100644
--- a/graphics/blit/blit-alpha.h
+++ b/graphics/blit/blit-alpha.h
@@ -27,23 +27,33 @@ class BlendBlitImpl_Base {
 	friend class BlendBlit;
 protected:
 
+template<bool doscale, bool rgbmod, bool alphamod>
 struct AlphaBlend {
 	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
 
 		if (ina != 0) {
-			uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
-			uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
-			uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
-
-			out[BlendBlit::kAIndex] = 255;
-			out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
-			out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
-			out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
+			if (rgbmod) {
+				const uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
+				const uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
+				const uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
+
+				out[BlendBlit::kAIndex] = 255;
+				out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
+				out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
+				out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
+			} else {
+				out[BlendBlit::kAIndex] = 255;
+				out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * (255 - ina) + in[BlendBlit::kBIndex] * ina) >> 8;
+				out[BlendBlit::kGIndex] = (out[BlendBlit::kGIndex] * (255 - ina) + in[BlendBlit::kGIndex] * ina) >> 8;
+				out[BlendBlit::kRIndex] = (out[BlendBlit::kRIndex] * (255 - ina) + in[BlendBlit::kRIndex] * ina) >> 8;
+			}
 		}
+
 	}
 };
 
+template<bool doscale, bool rgbmod, bool alphamod>
 struct MultiplyBlend {
 	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
@@ -56,12 +66,14 @@ struct MultiplyBlend {
 	}
 };
 
+template<bool doscale, bool rgbmod, bool alphamod>
 struct OpaqueBlend {
 	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
 		*(uint32 *)out = *(const uint32 *)in | BlendBlit::kAModMask;
 	}
 };
 
+template<bool doscale, bool rgbmod, bool alphamod>
 struct BinaryBlend {
 	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32 pix = *(const uint32 *)in;
@@ -74,6 +86,7 @@ struct BinaryBlend {
 	}
 };
 
+template<bool doscale, bool rgbmod, bool alphamod>
 struct AdditiveBlend {
 	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
@@ -86,6 +99,7 @@ struct AdditiveBlend {
 	}
 };
 
+template<bool doscale, bool rgbmod, bool alphamod>
 struct SubtractiveBlend {
 	static inline void normal(const byte *in, byte *out, const byte ca, const byte cr, const byte cg, const byte cb) {
 		out[BlendBlit::kAIndex] = 255;
diff --git a/graphics/blit/blit-avx2.cpp b/graphics/blit/blit-avx2.cpp
index d62e9411af5..8fd5a8f038f 100644
--- a/graphics/blit/blit-avx2.cpp
+++ b/graphics/blit/blit-avx2.cpp
@@ -31,7 +31,7 @@ class BlendBlitImpl_AVX2 : public BlendBlitImpl_Base {
 	friend class BlendBlit;
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend {
+struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alphamod> {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m256i ina;
 		if (alphamod)
@@ -78,7 +78,7 @@ struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend {
+struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod, alphamod> {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m256i ina, alphaMask;
 		if (alphamod) {
@@ -124,14 +124,14 @@ struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend {
+struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<doscale, rgbmod, alphamod> {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		return _mm256_or_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
 	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend {
+struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<doscale, rgbmod, alphamod> {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m256i alphaMask = _mm256_cmpeq_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask)), _mm256_setzero_si256());
 		dst = _mm256_and_si256(dst, alphaMask);
@@ -141,7 +141,7 @@ struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend {
+struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod, alphamod> {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m256i ina;
 		if (alphamod)
@@ -195,7 +195,7 @@ struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend {
+struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<doscale, rgbmod, alphamod> {
 	static inline __m256i simd(__m256i src, __m256i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m256i ina = _mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kAModMask));
 		__m256i srcb = _mm256_srli_epi32(_mm256_and_si256(src, _mm256_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
diff --git a/graphics/blit/blit-generic.cpp b/graphics/blit/blit-generic.cpp
index 4d25769c035..25033ffcea1 100644
--- a/graphics/blit/blit-generic.cpp
+++ b/graphics/blit/blit-generic.cpp
@@ -19,82 +19,31 @@
  *
  */
 
-#include "common/system.h"
-#include "graphics/blit.h"
+#include "graphics/blit/blit-alpha.h"
 #include "graphics/pixelformat.h"
 
 namespace Graphics {
 
-class BlendBlitImpl_Default {
+class BlendBlitImpl_Default : public BlendBlitImpl_Base {
+	friend class BlendBlit;
 public:
-/**
- * Optimized version of doBlit to be used with multiply blended blitting
- */
-template<bool doscale, bool rgbmod, bool alphamod>
-static void doBlitMultiplyBlendLogicGeneric(BlendBlit::Args &args) {
+
+template<template <bool DOSCALE, bool RGBMOD, bool ALPHAMOD> class PixelFunc, bool doscale, bool rgbmod, bool alphamod, bool coloradd1, bool loaddst>
+static inline void blitInnerLoop(BlendBlit::Args &args) {
 	const byte *in;
 	byte *out;
 
-	int scaleXCtr, scaleYCtr = args.scaleYoff;
-	const byte *inBase;
-
 	const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
 	const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
 	const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
 	const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
-	const uint32 cr = rgbmod   ? (rawcr == 255 ? 256 : rawcr) : 256;
-	const uint32 cg = rgbmod   ? (rawcg == 255 ? 256 : rawcg) : 256;
-	const uint32 cb = rgbmod   ? (rawcb == 255 ? 256 : rawcb) : 256;
-
-	for (uint32 i = 0; i < args.height; i++) {
-		if (doscale) {
-			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
-			scaleXCtr = args.scaleXoff;
-		} else {
-			in = args.ino;
-		}
-		out = args.outo;
-		for (uint32 j = 0; j < args.width; j++) {
-			if (doscale) {
-				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
-			}
-
-			uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-			if (ina != 0) {
-				out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] * ((in[BlendBlit::kBIndex] * cb * ina) >> 16) >> 8;
-				out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] * ((in[BlendBlit::kGIndex] * cg * ina) >> 16) >> 8;
-				out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] * ((in[BlendBlit::kRIndex] * cr * ina) >> 16) >> 8;
-			}
-
-			if (doscale)
-				scaleXCtr += args.scaleX;
-			else
-				in += args.inStep;
-			out += 4;
-		}
-		if (doscale)
-			scaleYCtr += args.scaleY;
-		else
-			args.ino += args.inoStep;
-		args.outo += args.dstPitch;
-	}
-
-}
-
-template<bool doscale, bool rgbmod, bool alphamod>
-static void doBlitAlphaBlendLogicGeneric(BlendBlit::Args &args) {
-	const byte *in;
-	byte *out;
+	const uint32 cr = coloradd1 ? (rgbmod   ? (rawcr == 255 ? 256 : rawcr) : 256) : (rgbmod   ? rawcr : 255);
+	const uint32 cg = coloradd1 ? (rgbmod   ? (rawcg == 255 ? 256 : rawcg) : 256) : (rgbmod   ? rawcg : 255);
+	const uint32 cb = coloradd1 ? (rgbmod   ? (rawcb == 255 ? 256 : rawcb) : 256) : (rgbmod   ? rawcb : 255);
 
 	int scaleXCtr, scaleYCtr = args.scaleYoff;
 	const byte *inBase;
 
-	const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
-	const byte cr = rgbmod   ? ((args.color >> BlendBlit::kRModShift) & 0xFF) : 255;
-	const byte cg = rgbmod   ? ((args.color >> BlendBlit::kGModShift) & 0xFF) : 255;
-	const byte cb = rgbmod   ? ((args.color >> BlendBlit::kBModShift) & 0xFF) : 255;
-
 	for (uint32 i = 0; i < args.height; i++) {
 		if (doscale) {
 			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
@@ -103,136 +52,13 @@ static void doBlitAlphaBlendLogicGeneric(BlendBlit::Args &args) {
 			in = args.ino;
 		}
 		out = args.outo;
-		for (uint32 j = 0; j < args.width; j++) {
-			if (doscale) {
-				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
-			}
-
-			uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-			if (ina != 0) {
-				if (rgbmod) {
-					const uint outb = (out[BlendBlit::kBIndex] * (255 - ina) >> 8);
-					const uint outg = (out[BlendBlit::kGIndex] * (255 - ina) >> 8);
-					const uint outr = (out[BlendBlit::kRIndex] * (255 - ina) >> 8);
-
-					out[BlendBlit::kAIndex] = 255;
-					out[BlendBlit::kBIndex] = outb + (in[BlendBlit::kBIndex] * ina * cb >> 16);
-					out[BlendBlit::kGIndex] = outg + (in[BlendBlit::kGIndex] * ina * cg >> 16);
-					out[BlendBlit::kRIndex] = outr + (in[BlendBlit::kRIndex] * ina * cr >> 16);
-				} else {
-					out[BlendBlit::kAIndex] = 255;
-					out[BlendBlit::kBIndex] = (out[BlendBlit::kBIndex] * (255 - ina) + in[BlendBlit::kBIndex] * ina) >> 8;
-					out[BlendBlit::kGIndex] = (out[BlendBlit::kGIndex] * (255 - ina) + in[BlendBlit::kGIndex] * ina) >> 8;
-					out[BlendBlit::kRIndex] = (out[BlendBlit::kRIndex] * (255 - ina) + in[BlendBlit::kRIndex] * ina) >> 8;
-					
-				}
-			}
-
-			if (doscale)
-				scaleXCtr += args.scaleX;
-			else
-				in += args.inStep;
-			out += 4;
-		}
-
-		if (doscale)
-			scaleYCtr += args.scaleY;
-		else
-			args.ino += args.inoStep;
-		args.outo += args.dstPitch;
-	}
-}
-
-/**
- * Optimized version of doBlit to be used with subtractive blended blitting
- */
-template<bool doscale, bool rgbmod>
-static void doBlitSubtractiveBlendLogicGeneric(BlendBlit::Args &args) {
-	const byte *in;
-	byte *out;
-
-	int scaleXCtr, scaleYCtr = args.scaleYoff;
-	const byte *inBase;
 
-	const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
-	const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
-	const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
-	const uint32 cr = rgbmod   ? (rawcr == 255 ? 256 : rawcr) : 256;
-	const uint32 cg = rgbmod   ? (rawcg == 255 ? 256 : rawcg) : 256;
-	const uint32 cb = rgbmod   ? (rawcb == 255 ? 256 : rawcb) : 256;
-
-	for (uint32 i = 0; i < args.height; i++) {
-		if (doscale) {
-			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
-			scaleXCtr = args.scaleXoff;
-		} else {
-			in = args.ino;
-		}
-		out = args.outo;
-		for (uint32 j = 0; j < args.width; j++) {
-			if (doscale) {
-				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
-			}
-
-			out[BlendBlit::kAIndex] = 255;
-			out[BlendBlit::kBIndex] = MAX<int32>(out[BlendBlit::kBIndex] - ((in[BlendBlit::kBIndex] * cb  * (out[BlendBlit::kBIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-			out[BlendBlit::kGIndex] = MAX<int32>(out[BlendBlit::kGIndex] - ((in[BlendBlit::kGIndex] * cg  * (out[BlendBlit::kGIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-			out[BlendBlit::kRIndex] = MAX<int32>(out[BlendBlit::kRIndex] - ((in[BlendBlit::kRIndex] * cr * (out[BlendBlit::kRIndex]) * in[BlendBlit::kAIndex]) >> 24), 0);
-
-			if (doscale)
-				scaleXCtr += args.scaleX;
-			else
-				in += args.inStep;
-			out += 4;
-		}
-		if (doscale)
-			scaleYCtr += args.scaleY;
-		else
-			args.ino += args.inoStep;
-		args.outo += args.dstPitch;
-	}
-}
-
-/**
- * Optimized version of doBlit to be used with additive blended blitting
- */
-template<bool doscale, bool rgbmod, bool alphamod>
-static void doBlitAdditiveBlendLogicGeneric(BlendBlit::Args &args) {
-	const byte *in;
-	byte *out;
-
-	int scaleXCtr, scaleYCtr = args.scaleYoff;
-	const byte *inBase;
-
-	const byte rawcr = (args.color >> BlendBlit::kRModShift) & 0xFF;
-	const byte rawcg = (args.color >> BlendBlit::kGModShift) & 0xFF;
-	const byte rawcb = (args.color >> BlendBlit::kBModShift) & 0xFF;
-	const byte ca = alphamod ? ((args.color >> BlendBlit::kAModShift) & 0xFF) : 255;
-	const uint32 cr = rgbmod   ? (rawcr == 255 ? 256 : rawcr) : 256;
-	const uint32 cg = rgbmod   ? (rawcg == 255 ? 256 : rawcg) : 256;
-	const uint32 cb = rgbmod   ? (rawcb == 255 ? 256 : rawcb) : 256;
-
-	for (uint32 i = 0; i < args.height; i++) {
-		if (doscale) {
-			inBase = args.ino + scaleYCtr / BlendBlit::SCALE_THRESHOLD * args.inoStep;
-			scaleXCtr = args.scaleXoff;
-		} else {
-			in = args.ino;
-		}
-		out = args.outo;
 		for (uint32 j = 0; j < args.width; j++) {
 			if (doscale) {
 				in = inBase + scaleXCtr / BlendBlit::SCALE_THRESHOLD * args.inStep;
 			}
 
-			uint32 ina = in[BlendBlit::kAIndex] * ca >> 8;
-
-			if (ina != 0) {
-				out[BlendBlit::kBIndex] = out[BlendBlit::kBIndex] + ((in[BlendBlit::kBIndex] * cb * ina) >> 16);
-				out[BlendBlit::kGIndex] = out[BlendBlit::kGIndex] + ((in[BlendBlit::kGIndex] * cg * ina) >> 16);
-				out[BlendBlit::kRIndex] = out[BlendBlit::kRIndex] + ((in[BlendBlit::kRIndex] * cr * ina) >> 16);
-			}
+			PixelFunc<doscale, rgbmod, alphamod>::normal(in, out, ca, cr, cg, cb);
 
 			if (doscale)
 				scaleXCtr += args.scaleX;
@@ -240,7 +66,6 @@ static void doBlitAdditiveBlendLogicGeneric(BlendBlit::Args &args) {
 				in += args.inStep;
 			out += 4;
 		}
-
 		if (doscale)
 			scaleYCtr += args.scaleY;
 		else
@@ -332,124 +157,28 @@ static void doBlitBinaryBlendLogicGeneric(BlendBlit::Args &args) {
 
 }; // end of class BlendBlitImpl_Default
 
+template<>
+inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::OpaqueBlend, true, false, false, false, true>(BlendBlit::Args &args) {
+	doBlitOpaqueBlendLogicGeneric<true>(args);
+}
+
+template<>
+inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::OpaqueBlend, false, false, false, false, true>(BlendBlit::Args &args) {
+	doBlitOpaqueBlendLogicGeneric<false>(args);
+}
+
+template<>
+inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::BinaryBlend, true, false, false, false, true>(BlendBlit::Args &args) {
+	doBlitBinaryBlendLogicGeneric<true>(args);
+}
+
+template<>
+inline void BlendBlitImpl_Default::blitInnerLoop<BlendBlitImpl_Default::BinaryBlend, false, false, false, false, true>(BlendBlit::Args &args) {
+	doBlitBinaryBlendLogicGeneric<false>(args);
+}
+
 void BlendBlit::blitGeneric(Args &args, const TSpriteBlendMode &blendMode, const AlphaType &alphaType) {
-	bool rgbmod   = ((args.color & kRGBModMask) != kRGBModMask);
-	bool alphamod = ((args.color & kAModMask)   != kAModMask);
-	if (args.scaleX == BlendBlit::SCALE_THRESHOLD && args.scaleY == BlendBlit::SCALE_THRESHOLD) {
-		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
-			BlendBlitImpl_Default::doBlitOpaqueBlendLogicGeneric<false>(args);
-		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
-			BlendBlitImpl_Default::doBlitBinaryBlendLogicGeneric<false>(args);
-		} else {
-			if (blendMode == BLEND_ADDITIVE) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<false, false, false>(args);
-					}
-				}
-			} else if (blendMode == BLEND_SUBTRACTIVE) {
-				if (rgbmod) {
-					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<false, true>(args);
-				} else {
-					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<false, false>(args);
-				}
-			} else if (blendMode == BLEND_MULTIPLY) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<false, false, false>(args);
-					}
-				}
-			} else {
-				assert(blendMode == BLEND_NORMAL);
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<false, false, false>(args);
-					}
-				}
-			}
-		}
-	} else {
-		if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_OPAQUE) {
-			BlendBlitImpl_Default::doBlitOpaqueBlendLogicGeneric<true>(args);
-		} else if (args.color == 0xffffffff && blendMode == BLEND_NORMAL && alphaType == ALPHA_BINARY) {
-			BlendBlitImpl_Default::doBlitBinaryBlendLogicGeneric<true>(args);
-		} else {
-			if (blendMode == BLEND_ADDITIVE) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAdditiveBlendLogicGeneric<true, false, false>(args);
-					}
-				}
-			} else if (blendMode == BLEND_SUBTRACTIVE) {
-				if (rgbmod) {
-					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<true, true>(args);
-				} else {
-					BlendBlitImpl_Default::doBlitSubtractiveBlendLogicGeneric<true, false>(args);
-				}
-			} else if (blendMode == BLEND_MULTIPLY) {
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitMultiplyBlendLogicGeneric<true, false, false>(args);
-					}
-				}
-			} else {
-				assert(blendMode == BLEND_NORMAL);
-				if (rgbmod) {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, true, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, true, false>(args);
-					}
-				} else {
-					if (alphamod) {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, false, true>(args);
-					} else {
-						BlendBlitImpl_Default::doBlitAlphaBlendLogicGeneric<true, false, false>(args);
-					}
-				}
-			}
-		}
-	}
+	blitT<BlendBlitImpl_Default>(args, blendMode, alphaType);
 }
 
 } // End of namespace Graphics
diff --git a/graphics/blit/blit-neon.cpp b/graphics/blit/blit-neon.cpp
index 0e737d31438..85a38bddbb3 100644
--- a/graphics/blit/blit-neon.cpp
+++ b/graphics/blit/blit-neon.cpp
@@ -33,7 +33,7 @@ class BlendBlitImpl_NEON : public BlendBlitImpl_Base {
 	friend class BlendBlit;
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend {
+struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alphamod> {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32x4_t ina;
 		if (alphamod)
@@ -80,7 +80,7 @@ struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend {
+struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod, alphamod> {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32x4_t ina;
 		if (alphamod)
@@ -121,14 +121,14 @@ struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend {
+struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<doscale, rgbmod, alphamod> {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		return vorrq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
 	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend {
+struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<doscale, rgbmod, alphamod> {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32x4_t alphaMask = vceqq_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask)), vmovq_n_u32(0));
 		dst = vandq_u32(dst, alphaMask);
@@ -138,7 +138,7 @@ struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend {
+struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod, alphamod> {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32x4_t ina;
 		if (alphamod)
@@ -192,7 +192,7 @@ struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend {
+struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<doscale, rgbmod, alphamod> {
 	static inline uint32x4_t simd(uint32x4_t src, uint32x4_t dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		uint32x4_t ina = vandq_u32(src, vmovq_n_u32(BlendBlit::kAModMask));
 		uint32x4_t srcb = vshrq_n_u32(vandq_u32(src, vmovq_n_u32(BlendBlit::kBModMask)), BlendBlit::kBModShift);
diff --git a/graphics/blit/blit-sse2.cpp b/graphics/blit/blit-sse2.cpp
index 8bb5db923af..5fbe44b25f4 100644
--- a/graphics/blit/blit-sse2.cpp
+++ b/graphics/blit/blit-sse2.cpp
@@ -37,7 +37,7 @@ class BlendBlitImpl_SSE2 : public BlendBlitImpl_Base {
 	friend class BlendBlit;
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend {
+struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend<doscale, rgbmod, alphamod> {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m128i ina;
 		if (alphamod)
@@ -84,7 +84,7 @@ struct AlphaBlend : public BlendBlitImpl_Base::AlphaBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend {
+struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend<doscale, rgbmod, alphamod> {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m128i ina, alphaMask;
 		if (alphamod) {
@@ -130,14 +130,14 @@ struct MultiplyBlend : public BlendBlitImpl_Base::MultiplyBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend {
+struct OpaqueBlend : public BlendBlitImpl_Base::OpaqueBlend<doscale, rgbmod, alphamod> {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		return _mm_or_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
 	}
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend {
+struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend<doscale, rgbmod, alphamod> {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m128i alphaMask = _mm_cmpeq_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask)), _mm_setzero_si128());
 		dst = _mm_and_si128(dst, alphaMask);
@@ -147,7 +147,7 @@ struct BinaryBlend : public BlendBlitImpl_Base::BinaryBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend {
+struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend<doscale, rgbmod, alphamod> {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m128i ina;
 		if (alphamod)
@@ -201,7 +201,7 @@ struct AdditiveBlend : public BlendBlitImpl_Base::AdditiveBlend {
 };
 
 template<bool doscale, bool rgbmod, bool alphamod>
-struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend {
+struct SubtractiveBlend : public BlendBlitImpl_Base::SubtractiveBlend<doscale, rgbmod, alphamod> {
 	static inline __m128i simd(__m128i src, __m128i dst, const bool flip, const byte ca, const byte cr, const byte cg, const byte cb) {
 		__m128i ina = _mm_and_si128(src, _mm_set1_epi32(BlendBlit::kAModMask));
 		__m128i srcb = _mm_srli_epi32(_mm_and_si128(src, _mm_set1_epi32(BlendBlit::kBModMask)), BlendBlit::kBModShift);




More information about the Scummvm-git-logs mailing list