[Scummvm-git-logs] scummvm master -> 5f86c391ec5dca27b5acef6d63f91189336f4e4b

lephilousophe noreply at scummvm.org
Mon Jan 3 09:52:15 UTC 2022


This automated email contains information about 2 new commits which have been
pushed to the 'scummvm' repo located at https://github.com/scummvm/scummvm .

Summary:
2b549a763a GRAPHICS: Make HQx ASM scalers relocations free
5f86c391ec GRAPHICS: Remove global variables of HQ scaler


Commit: 2b549a763aa7f24f34e085cc0da9ab15f5276d5b
    https://github.com/scummvm/scummvm/commit/2b549a763aa7f24f34e085cc0da9ab15f5276d5b
Author: Le Philousophe (lephilousophe at users.noreply.github.com)
Date: 2022-01-03T10:52:11+01:00

Commit Message:
GRAPHICS: Make HQx ASM scalers relocations free

This is considered as a good practice to not have relocations in text
segment.
The binary is now completely PIC and can easily be relocated at any
address (ASLR).

Changed paths:
    graphics/scaler/hq.cpp
    graphics/scaler/hq2x_i386.asm
    graphics/scaler/hq3x_i386.asm


diff --git a/graphics/scaler/hq.cpp b/graphics/scaler/hq.cpp
index 8360f0cc319..a4be89203f0 100644
--- a/graphics/scaler/hq.cpp
+++ b/graphics/scaler/hq.cpp
@@ -23,31 +23,6 @@
 #include "graphics/scaler/intern.h"
 
 // RGB-to-YUV lookup table
-extern "C" {
-
-#ifdef USE_NASM
-// NOTE: if your compiler uses different mangled names, add another
-//       condition here
-
-#if !defined(_WIN32) && !defined(MACOSX) && !defined(__OS2__)
-#define RGBtoYUV _RGBtoYUV
-#define hqx_highbits _hqx_highbits
-#define hqx_lowbits _hqx_lowbits
-#define hqx_low2bits _hqx_low2bits
-#define hqx_low3bits _hqx_low3bits
-#define hqx_greenMask _hqx_greenMask
-#define hqx_redBlueMask _hqx_redBlueMask
-#define hqx_green_redBlue_Mask _hqx_green_redBlue_Mask
-#endif
-
-uint32 hqx_highbits = 0xF7DEF7DE;
-uint32 hqx_lowbits = 0x0821;
-uint32 hqx_low2bits = 0x0C63;
-uint32 hqx_low3bits = 0x1CE7;
-uint32 hqx_greenMask = 0;
-uint32 hqx_redBlueMask = 0;
-uint32 hqx_green_redBlue_Mask = 0;
-#endif
 
 /**
  * 16bit RGB to YUV conversion table. This table is setup by InitLUT().
@@ -70,8 +45,24 @@ uint32 hqx_green_redBlue_Mask = 0;
  * differences are likely to vary a lot between different architectures and
  * CPUs.
  */
-uint32 *RGBtoYUV = 0;
-}
+static uint32 *RGBtoYUV = 0;
+
+#ifdef USE_NASM
+// NOTE: if your compiler uses different mangled names, add another
+//       condition here
+typedef struct {
+	uint32 *RGBtoYUV;
+	uint32 highbits;
+	uint32 lowbits;
+	uint32 low2bits;
+	uint32 low3bits;
+	uint32 greenMask;
+	uint32 redBlueMask;
+	uint32 green_redBlue_Mask;
+} hqx_parameters;
+
+static hqx_parameters hqx_params;
+#endif
 
 void InitLUT(Graphics::PixelFormat format) {
 	uint8 r, g, b;
@@ -95,18 +86,20 @@ void InitLUT(Graphics::PixelFormat format) {
 	}
 
 #ifdef USE_NASM
-	hqx_lowbits  = (1 << format.rShift) | (1 << format.gShift) | (1 << format.bShift),
-	hqx_low2bits = (3 << format.rShift) | (3 << format.gShift) | (3 << format.bShift),
-	hqx_low3bits = (7 << format.rShift) | (7 << format.gShift) | (7 << format.bShift),
+	hqx_params.lowbits  = (1 << format.rShift) | (1 << format.gShift) | (1 << format.bShift),
+	hqx_params.low2bits = (3 << format.rShift) | (3 << format.gShift) | (3 << format.bShift),
+	hqx_params.low3bits = (7 << format.rShift) | (7 << format.gShift) | (7 << format.bShift),
 
-	hqx_highbits = format.RGBToColor(255,255,255) ^ hqx_lowbits;
+	hqx_params.highbits = format.RGBToColor(255,255,255) ^ hqx_params.lowbits;
 
 	// FIXME: The following code only does the right thing
 	// if the color order is RGB or BGR, i.e., green is in the middle.
-	hqx_greenMask = format.RGBToColor(0,255,0);
-	hqx_redBlueMask = format.RGBToColor(255,0,255);
+	hqx_params.greenMask = format.RGBToColor(0,255,0);
+	hqx_params.redBlueMask = format.RGBToColor(255,0,255);
+
+	hqx_params.green_redBlue_Mask = (hqx_params.greenMask << 16) | hqx_params.redBlueMask;
 
-	hqx_green_redBlue_Mask = (hqx_greenMask << 16) | hqx_redBlueMask;
+	hqx_params.RGBtoYUV = RGBtoYUV;
 #endif
 }
 
@@ -119,12 +112,12 @@ extern "C" {
 #define hq2x_16 _hq2x_16
 #endif
 
-void hq2x_16(const byte *, byte *, uint32, uint32, uint32, uint32);
+void hq2x_16(const byte *, byte *, uint32, uint32, uint32, uint32, hqx_parameters *);
 
 }
 
 void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-	hq2x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch);
+	hq2x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch, &hqx_params);
 }
 
 #endif
@@ -192,7 +185,6 @@ void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
 #define PIXEL11_90	*(q+1+nextlineDst) = interpolate_2_3_3(w5, w6, w8);
 #define PIXEL11_100	*(q+1+nextlineDst) = interpolate_14_1_1(w5, w6, w8);
 
-extern "C" uint32   *RGBtoYUV;
 #define YUV(x)	(sizeof(Pixel) == 2 ? RGBtoYUV[w ## x] : ConvertYUV<ColorMask>(w ## x))
 
 /**
@@ -2086,12 +2078,12 @@ extern "C" {
 #endif
 
 
-void hq3x_16(const byte *, byte *, uint32, uint32, uint32, uint32);
+void hq3x_16(const byte *, byte *, uint32, uint32, uint32, uint32, hqx_parameters *);
 
 }
 
 void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-	hq3x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch);
+	hq3x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch, &hqx_params);
 }
 
 #endif
diff --git a/graphics/scaler/hq2x_i386.asm b/graphics/scaler/hq2x_i386.asm
index 4f8e0713b76..93de0ebcf9e 100644
--- a/graphics/scaler/hq2x_i386.asm
+++ b/graphics/scaler/hq2x_i386.asm
@@ -20,50 +20,60 @@
 
 GLOBAL _hq2x_16
 
-EXTERN _RGBtoYUV
-EXTERN _hqx_highbits
-EXTERN _hqx_lowbits
-EXTERN _hqx_low2bits
-EXTERN _hqx_low3bits
-EXTERN _hqx_greenMask
-EXTERN _hqx_redBlueMask
-EXTERN _hqx_green_redBlue_Mask
-
-SECTION .bss
-linesleft resd 1
-xcounter  resd 1
-cross     resd 1
-nextline  resd 1
-prevline  resd 1
-w1        resd 1
-w2        resd 1
-w3        resd 1
-w4        resd 1
-w5        resd 1
-w6        resd 1
-w7        resd 1
-w8        resd 1
-w9        resd 1
-
-SECTION .data
-
-reg_blank    dd  0,0
-const3       dd  0x00030003,0x00000003
-const5       dd  0x00050005,0x00000005
-const6       dd  0x00060006,0x00000006
-const14      dd  0x000E000E,0x0000000E
-threshold    dd  0x00300706,0x00000000
-moduloSrc    dd  0
-moduloDst    dd  0
-
 SECTION .text
 
+thresholdVal0        equ 0x00300706
+thresholdVal1        equ 0x00000000
+
+; Function variables space
+
+w1                   equ -112
+w2                   equ -108
+w3                   equ -104
+w4                   equ -100
+w5                   equ -96
+w6                   equ -92
+w7                   equ -88
+w8                   equ -84
+w9                   equ -80
+linesleft            equ -76
+xcounter             equ -72
+cross                equ -68
+nextline             equ -64
+prevline             equ -60
+moduloSrc            equ -56
+moduloDst            equ -52
+threshold            equ -48
+; threshold is 64bits
+pFuncTable2          equ -40
+pFuncTable           equ -36
+
+; the following variables must match hqx_parameters structure
+; Address of the whole structure
+hqx_params_          equ -32
+RGBtoYUV             equ -32
+highbits             equ -28
+lowbits              equ -24
+low2bits             equ -20
+low3bits             equ -16
+greenMask            equ -12
+redBlueMask          equ -8
+green_redBlue_Mask   equ -4
+
+inbuffer     equ 8
+outbuffer    equ 12
+Xres         equ 16
+Yres         equ 20
+srcPitch     equ 24
+dstPitch     equ 28
+hqx_params   equ 32
+
 %macro TestDiff 2
     xor     ecx,ecx
     mov     edx,[%1]
     cmp     edx,[%2]
     je      %%fin
-    mov     ecx,[_RGBtoYUV]
+    mov     ecx,[ebp+RGBtoYUV]
     movd    mm1,[ecx+edx*4]
     movq    mm5,mm1
     mov     edx,[%2]
@@ -71,7 +81,7 @@ SECTION .text
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    ecx,mm1
 %%fin:
 %endmacro
@@ -139,12 +149,12 @@ SECTION .text
 %macro Interp1 3
     mov edx,%2
     mov ecx,%3
-    and edx,[_hqx_highbits]
-    and ecx,[_hqx_highbits]
+    and edx,[ebp+highbits]
+    and ecx,[ebp+highbits]
     add ecx,edx
     shr ecx,1
-    add ecx,[_hqx_lowbits]
-    and ecx,[_hqx_highbits]
+    add ecx,[ebp+lowbits]
+    and ecx,[ebp+highbits]
     add edx,ecx
     shr edx,1
     mov %1,dx
@@ -157,14 +167,14 @@ SECTION .text
 %macro Interp2 4
     mov edx,%3
     mov ecx,%4
-    and edx,[_hqx_highbits]
-    and ecx,[_hqx_highbits]
+    and edx,[ebp+highbits]
+    and ecx,[ebp+highbits]
     add ecx,edx
     shr ecx,1
-    add ecx,[_hqx_lowbits]
+    add ecx,[ebp+lowbits]
     mov edx,%2
-    and ecx,[_hqx_highbits]
-    and edx,[_hqx_highbits]
+    and ecx,[ebp+highbits]
+    and edx,[ebp+highbits]
     add edx,ecx
     shr edx,1
     mov %1,dx
@@ -177,7 +187,7 @@ SECTION .text
 	mov ecx, eax
 	shl ecx, 16
 	or  ecx, eax
-	and ecx, [_hqx_green_redBlue_Mask]
+	and ecx, [ebp+green_redBlue_Mask]
 
 	; multiply c1 by 5
 	lea ecx, [5*ecx]
@@ -186,7 +196,7 @@ SECTION .text
 	mov edx, %2
 	shl edx, 16
 	or  edx, %2
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; add 2*c2 to 5*c1
 	lea ecx, [ecx + 2*edx]
@@ -195,12 +205,12 @@ SECTION .text
 	mov edx, %3
 	shl edx, 16
 	or  edx, %3
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; add c3 and 2*c2+c1*5, divide by 8, mask the result
 	add edx, ecx
 	shr edx, 3
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; finally, repack the mixed pixel
 	mov ecx, edx
@@ -217,7 +227,7 @@ SECTION .text
 	mov ecx, eax
 	shl ecx, 16
 	or  ecx, eax
-	and ecx, [_hqx_green_redBlue_Mask]
+	and ecx, [ebp+green_redBlue_Mask]
 
 	; multiply c1 by 6
 	lea ecx, [3*ecx]		; mul by 3
@@ -227,7 +237,7 @@ SECTION .text
 	mov edx, %2
 	shl edx, 16
 	or  edx, %2
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; add c2 to c1*6
 	add ecx, edx
@@ -236,12 +246,12 @@ SECTION .text
 	mov edx, %3
 	shl edx, 16
 	or  edx, %3
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; add c3 and c2+c1*3, divide by 8, mask the result
 	add edx, ecx
 	shr edx, 3
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; finally, repack the mixed pixel
 	mov ecx, edx
@@ -258,13 +268,13 @@ SECTION .text
 	mov edx, %2
 	shl edx, 16
 	or  edx, %2
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; unpack c3 to ecx
 	mov ecx, %3
 	shl ecx, 16
 	or  ecx, %3
-	and ecx, [_hqx_green_redBlue_Mask]
+	and ecx, [ebp+green_redBlue_Mask]
 
 	; set edx to 3*(c2+c3)
 	add edx, ecx
@@ -274,12 +284,12 @@ SECTION .text
 	mov ecx, eax
 	shl ecx, 16
 	or  ecx, eax
-	and ecx, [_hqx_green_redBlue_Mask]
+	and ecx, [ebp+green_redBlue_Mask]
 
 	; sum 2*c1 + 3*(c2+c3), divide by 8, mask the result
 	lea edx, [edx + 2*ecx]
 	shr edx, 3
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; finally, repack the mixed pixel
 	mov ecx, edx
@@ -296,7 +306,7 @@ SECTION .text
 	mov edx, eax
 	shl edx, 16
 	or  edx, eax
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; multiply c1 by 7 -> store in ecx
 	lea ecx, [8*edx]
@@ -306,7 +316,7 @@ SECTION .text
 	mov edx, %2
 	shl edx, 16
 	or  edx, %2
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; add c2 to 2*ecx=2*(c1*7)=c1*14 -> store in ecx
 	lea ecx, [edx + 2*ecx]
@@ -315,12 +325,12 @@ SECTION .text
 	mov edx, %3
 	shl edx, 16
 	or  edx, %3
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; add c3 and c2+c1*14, divide by 16, mask the result
 	add edx, ecx
 	shr edx, 4
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; finally, repack the mixed pixel
 	mov ecx, edx
@@ -335,47 +345,47 @@ SECTION .text
 %endmacro
 
 %macro PIXEL00_10 0
-    Interp1 [edi],eax,[w1]
+    Interp1 [edi],eax,[ebp+w1]
 %endmacro
 
 %macro PIXEL00_11 0
-    Interp1 [edi],eax,[w4]
+    Interp1 [edi],eax,[ebp+w4]
 %endmacro
 
 %macro PIXEL00_12 0
-    Interp1 [edi],eax,[w2]
+    Interp1 [edi],eax,[ebp+w2]
 %endmacro
 
 %macro PIXEL00_20 0
-    Interp2 [edi],eax,[w4],[w2]
+    Interp2 [edi],eax,[ebp+w4],[ebp+w2]
 %endmacro
 
 %macro PIXEL00_21 0
-    Interp2 [edi],eax,[w1],[w2]
+    Interp2 [edi],eax,[ebp+w1],[ebp+w2]
 %endmacro
 
 %macro PIXEL00_22 0
-    Interp2 [edi],eax,[w1],[w4]
+    Interp2 [edi],eax,[ebp+w1],[ebp+w4]
 %endmacro
 
 %macro PIXEL00_60 0
-    Interp6 [edi],[w2],[w4]
+    Interp6 [edi],[ebp+w2],[ebp+w4]
 %endmacro
 
 %macro PIXEL00_61 0
-    Interp6 [edi],[w4],[w2]
+    Interp6 [edi],[ebp+w4],[ebp+w2]
 %endmacro
 
 %macro PIXEL00_70 0
-    Interp7 [edi],[w4],[w2]
+    Interp7 [edi],[ebp+w4],[ebp+w2]
 %endmacro
 
 %macro PIXEL00_90 0
-    Interp9 [edi],[w4],[w2]
+    Interp9 [edi],[ebp+w4],[ebp+w2]
 %endmacro
 
 %macro PIXEL00_100 0
-    Interp10 [edi],[w4],[w2]
+    Interp10 [edi],[ebp+w4],[ebp+w2]
 %endmacro
 
 %macro PIXEL01_0 0
@@ -383,47 +393,47 @@ SECTION .text
 %endmacro
 
 %macro PIXEL01_10 0
-    Interp1 [edi+2],eax,[w3]
+    Interp1 [edi+2],eax,[ebp+w3]
 %endmacro
 
 %macro PIXEL01_11 0
-    Interp1 [edi+2],eax,[w2]
+    Interp1 [edi+2],eax,[ebp+w2]
 %endmacro
 
 %macro PIXEL01_12 0
-    Interp1 [edi+2],eax,[w6]
+    Interp1 [edi+2],eax,[ebp+w6]
 %endmacro
 
 %macro PIXEL01_20 0
-    Interp2 [edi+2],eax,[w2],[w6]
+    Interp2 [edi+2],eax,[ebp+w2],[ebp+w6]
 %endmacro
 
 %macro PIXEL01_21 0
-    Interp2 [edi+2],eax,[w3],[w6]
+    Interp2 [edi+2],eax,[ebp+w3],[ebp+w6]
 %endmacro
 
 %macro PIXEL01_22 0
-    Interp2 [edi+2],eax,[w3],[w2]
+    Interp2 [edi+2],eax,[ebp+w3],[ebp+w2]
 %endmacro
 
 %macro PIXEL01_60 0
-    Interp6 [edi+2],[w6],[w2]
+    Interp6 [edi+2],[ebp+w6],[ebp+w2]
 %endmacro
 
 %macro PIXEL01_61 0
-    Interp6 [edi+2],[w2],[w6]
+    Interp6 [edi+2],[ebp+w2],[ebp+w6]
 %endmacro
 
 %macro PIXEL01_70 0
-    Interp7 [edi+2],[w2],[w6]
+    Interp7 [edi+2],[ebp+w2],[ebp+w6]
 %endmacro
 
 %macro PIXEL01_90 0
-    Interp9 [edi+2],[w2],[w6]
+    Interp9 [edi+2],[ebp+w2],[ebp+w6]
 %endmacro
 
 %macro PIXEL01_100 0
-    Interp10 [edi+2],[w2],[w6]
+    Interp10 [edi+2],[ebp+w2],[ebp+w6]
 %endmacro
 
 %macro PIXEL10_0 0
@@ -431,47 +441,47 @@ SECTION .text
 %endmacro
 
 %macro PIXEL10_10 0
-    Interp1 [edi+ebx],eax,[w7]
+    Interp1 [edi+ebx],eax,[ebp+w7]
 %endmacro
 
 %macro PIXEL10_11 0
-    Interp1 [edi+ebx],eax,[w8]
+    Interp1 [edi+ebx],eax,[ebp+w8]
 %endmacro
 
 %macro PIXEL10_12 0
-    Interp1 [edi+ebx],eax,[w4]
+    Interp1 [edi+ebx],eax,[ebp+w4]
 %endmacro
 
 %macro PIXEL10_20 0
-    Interp2 [edi+ebx],eax,[w8],[w4]
+    Interp2 [edi+ebx],eax,[ebp+w8],[ebp+w4]
 %endmacro
 
 %macro PIXEL10_21 0
-    Interp2 [edi+ebx],eax,[w7],[w4]
+    Interp2 [edi+ebx],eax,[ebp+w7],[ebp+w4]
 %endmacro
 
 %macro PIXEL10_22 0
-    Interp2 [edi+ebx],eax,[w7],[w8]
+    Interp2 [edi+ebx],eax,[ebp+w7],[ebp+w8]
 %endmacro
 
 %macro PIXEL10_60 0
-    Interp6 [edi+ebx],[w4],[w8]
+    Interp6 [edi+ebx],[ebp+w4],[ebp+w8]
 %endmacro
 
 %macro PIXEL10_61 0
-    Interp6 [edi+ebx],[w8],[w4]
+    Interp6 [edi+ebx],[ebp+w8],[ebp+w4]
 %endmacro
 
 %macro PIXEL10_70 0
-    Interp7 [edi+ebx],[w8],[w4]
+    Interp7 [edi+ebx],[ebp+w8],[ebp+w4]
 %endmacro
 
 %macro PIXEL10_90 0
-    Interp9 [edi+ebx],[w8],[w4]
+    Interp9 [edi+ebx],[ebp+w8],[ebp+w4]
 %endmacro
 
 %macro PIXEL10_100 0
-    Interp10 [edi+ebx],[w8],[w4]
+    Interp10 [edi+ebx],[ebp+w8],[ebp+w4]
 %endmacro
 
 %macro PIXEL11_0 0
@@ -479,175 +489,184 @@ SECTION .text
 %endmacro
 
 %macro PIXEL11_10 0
-    Interp1 [edi+ebx+2],eax,[w9]
+    Interp1 [edi+ebx+2],eax,[ebp+w9]
 %endmacro
 
 %macro PIXEL11_11 0
-    Interp1 [edi+ebx+2],eax,[w6]
+    Interp1 [edi+ebx+2],eax,[ebp+w6]
 %endmacro
 
 %macro PIXEL11_12 0
-    Interp1 [edi+ebx+2],eax,[w8]
+    Interp1 [edi+ebx+2],eax,[ebp+w8]
 %endmacro
 
 %macro PIXEL11_20 0
-    Interp2 [edi+ebx+2],eax,[w6],[w8]
+    Interp2 [edi+ebx+2],eax,[ebp+w6],[ebp+w8]
 %endmacro
 
 %macro PIXEL11_21 0
-    Interp2 [edi+ebx+2],eax,[w9],[w8]
+    Interp2 [edi+ebx+2],eax,[ebp+w9],[ebp+w8]
 %endmacro
 
 %macro PIXEL11_22 0
-    Interp2 [edi+ebx+2],eax,[w9],[w6]
+    Interp2 [edi+ebx+2],eax,[ebp+w9],[ebp+w6]
 %endmacro
 
 %macro PIXEL11_60 0
-    Interp6 [edi+ebx+2],[w8],[w6]
+    Interp6 [edi+ebx+2],[ebp+w8],[ebp+w6]
 %endmacro
 
 %macro PIXEL11_61 0
-    Interp6 [edi+ebx+2],[w6],[w8]
+    Interp6 [edi+ebx+2],[ebp+w6],[ebp+w8]
 %endmacro
 
 %macro PIXEL11_70 0
-    Interp7 [edi+ebx+2],[w6],[w8]
+    Interp7 [edi+ebx+2],[ebp+w6],[ebp+w8]
 %endmacro
 
 %macro PIXEL11_90 0
-    Interp9 [edi+ebx+2],[w6],[w8]
+    Interp9 [edi+ebx+2],[ebp+w6],[ebp+w8]
 %endmacro
 
 %macro PIXEL11_100 0
-    Interp10 [edi+ebx+2],[w6],[w8]
+    Interp10 [edi+ebx+2],[ebp+w6],[ebp+w8]
 %endmacro
 
-inbuffer     equ 8
-outbuffer    equ 12
-Xres         equ 16
-Yres         equ 20
-srcPitch     equ 24
-dstPitch     equ 28
-
 _hq2x_16:
     push ebp
     mov ebp,esp
+    sub esp,112
     pushad
 
+    call    _get_pc
+.pc_anchor:
+    mov     ecx,eax
+    add     eax,(FuncTable-.pc_anchor)
+    mov     dword[ebp+pFuncTable],eax
+    add     ecx,(FuncTable2-.pc_anchor)
+    mov     dword[ebp+pFuncTable2],ecx
+    mov     dword[ebp+threshold],thresholdVal0
+    mov     dword[ebp+threshold+4], thresholdVal1
+
+    mov     esi,[ebp+hqx_params]
+    lea     edi,[ebp+hqx_params_]
+    mov     ecx,(green_redBlue_Mask+4-hqx_params_)/4
+    rep     movsd
+
     mov     esi,[ebp+inbuffer]
     mov     edi,[ebp+outbuffer]
     mov     edx,[ebp+Yres]
-    mov     [linesleft],edx
+    mov     [ebp+linesleft],edx
     mov     ecx,[ebp+Xres]
     shl     ecx,1
     mov     ebx,[ebp+dstPitch]
-    mov     dword[moduloDst],ebx
-    sub     dword[moduloDst],ecx
-    shl     dword[moduloDst],1
+    mov     dword[ebp+moduloDst],ebx
+    sub     dword[ebp+moduloDst],ecx
+    shl     dword[ebp+moduloDst],1
     mov     ebx,[ebp+srcPitch]
-    mov     dword[nextline],ebx
-    mov     dword[moduloSrc],ebx
-    sub     dword[moduloSrc],ecx
+    mov     dword[ebp+nextline],ebx
+    mov     dword[ebp+moduloSrc],ebx
+    sub     dword[ebp+moduloSrc],ecx
     neg     ebx
-    mov     dword[prevline],ebx
+    mov     dword[ebp+prevline],ebx
 .loopy:
     mov     ecx,[ebp+Xres]
-    mov     dword[xcounter],ecx
+    mov     dword[ebp+xcounter],ecx
 .loopx:
-    mov     ebx,[prevline]
+    mov     ebx,[ebp+prevline]
     movq    mm5,[esi+ebx-2]
     movq    mm6,[esi-2]
-    mov     ebx,[nextline]
+    mov     ebx,[ebp+nextline]
     movq    mm7,[esi+ebx-2]
     movd    eax,mm5
     movzx   edx,ax
-    mov     [w1],edx
+    mov     [ebp+w1],edx
     shr     eax,16
-    mov     [w2],eax
+    mov     [ebp+w2],eax
     psrlq   mm5,32
     movd    eax,mm5
     movzx   edx,ax
-    mov     [w3],edx
+    mov     [ebp+w3],edx
     movd    eax,mm6
     movzx   edx,ax
-    mov     [w4],edx
+    mov     [ebp+w4],edx
     shr     eax,16
-    mov     [w5],eax
+    mov     [ebp+w5],eax
     psrlq   mm6,32
     movd    eax,mm6
     movzx   edx,ax
-    mov     [w6],edx
+    mov     [ebp+w6],edx
     movd    eax,mm7
     movzx   edx,ax
-    mov     [w7],edx
+    mov     [ebp+w7],edx
     shr     eax,16
-    mov     [w8],eax
+    mov     [ebp+w8],eax
     psrlq   mm7,32
     movd    eax,mm7
     movzx   edx,ax
-    mov     [w9],edx
+    mov     [ebp+w9],edx
 .flags:
-    mov     ebx,[_RGBtoYUV]
-    mov     eax,[w5]
+    mov     ebx,[ebp+RGBtoYUV]
+    mov     eax,[ebp+w5]
     xor     ecx,ecx
     movd    mm5,[ebx+eax*4]
-    mov     dword[cross],0
+    mov     dword[ebp+cross],0
 
-    mov     edx,[w2]
+    mov     edx,[ebp+w2]
     cmp     eax,edx
     je      .noflag2
-    or      dword[cross],1
+    or      dword[ebp+cross],1
     movq    mm1,mm5
     movd    mm2,[ebx+edx*4]
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag2
     or      ecx,2
 .noflag2:
-    mov     edx,[w4]
+    mov     edx,[ebp+w4]
     cmp     eax,edx
     je      .noflag4
-    or      dword[cross],2
+    or      dword[ebp+cross],2
     movq    mm1,mm5
     movd    mm2,[ebx+edx*4]
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag4
     or      ecx,8
 .noflag4:
-    mov     edx,[w6]
+    mov     edx,[ebp+w6]
     cmp     eax,edx
     je      .noflag6
-    or      dword[cross],4
+    or      dword[ebp+cross],4
     movq    mm1,mm5
     movd    mm2,[ebx+edx*4]
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag6
     or      ecx,16
 .noflag6:
-    mov     edx,[w8]
+    mov     edx,[ebp+w8]
     cmp     eax,edx
     je      .noflag8
-    or      dword[cross],8
+    or      dword[ebp+cross],8
     movq    mm1,mm5
     movd    mm2,[ebx+edx*4]
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag8
@@ -655,11 +674,14 @@ _hq2x_16:
 .noflag8:
     test    ecx,ecx
     jnz     .testflag1
-    mov     ecx,[cross]
+    mov     ecx,[ebp+cross]
+    mov     ebx, [ebp+pFuncTable2]
+    mov     ecx, [ebx+ecx*4]
+    add     ecx, ebx
     mov     ebx,[ebp+dstPitch]
-    jmp     [FuncTable2+ecx*4]
+    jmp     ecx
 .testflag1:
-    mov     edx,[w1]
+    mov     edx,[ebp+w1]
     cmp     eax,edx
     je      .noflag1
     movq    mm1,mm5
@@ -667,13 +689,13 @@ _hq2x_16:
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag1
     or      ecx,1
 .noflag1:
-    mov     edx,[w3]
+    mov     edx,[ebp+w3]
     cmp     eax,edx
     je      .noflag3
     movq    mm1,mm5
@@ -681,13 +703,13 @@ _hq2x_16:
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag3
     or      ecx,4
 .noflag3:
-    mov     edx,[w7]
+    mov     edx,[ebp+w7]
     cmp     eax,edx
     je      .noflag7
     movq    mm1,mm5
@@ -695,13 +717,13 @@ _hq2x_16:
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag7
     or      ecx,32
 .noflag7:
-    mov     edx,[w9]
+    mov     edx,[ebp+w9]
     cmp     eax,edx
     je      .noflag9
     movq    mm1,mm5
@@ -709,14 +731,17 @@ _hq2x_16:
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag9
     or      ecx,128
 .noflag9:
+    mov  ebx, [ebp+pFuncTable]
+    mov  ecx, [ebx+ecx*4]
+    add  ecx, ebx
     mov  ebx,[ebp+dstPitch]
-    jmp  [FuncTable+ecx*4]
+    jmp  ecx
 
 .. at flag0:
 .. at flag1:
@@ -850,7 +875,7 @@ _hq2x_16:
 .. at flag18:
 .. at flag50:
     PIXEL00_22
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_20
     PIXEL10_20
     PIXEL11_21
     jmp .loopx_end
@@ -859,18 +884,18 @@ _hq2x_16:
     PIXEL00_20
     PIXEL01_22
     PIXEL10_21
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_20
     jmp .loopx_end
 .. at flag72:
 .. at flag76:
     PIXEL00_21
     PIXEL01_20
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_20
     PIXEL11_22
     jmp .loopx_end
 .. at flag10:
 .. at flag138:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_20
     PIXEL01_21
     PIXEL10_22
     PIXEL11_20
@@ -922,7 +947,7 @@ _hq2x_16:
 .. at flag22:
 .. at flag54:
     PIXEL00_22
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_20
     PIXEL11_21
     jmp .loopx_end
@@ -931,67 +956,67 @@ _hq2x_16:
     PIXEL00_20
     PIXEL01_22
     PIXEL10_21
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag104:
 .. at flag108:
     PIXEL00_21
     PIXEL01_20
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_22
     jmp .loopx_end
 .. at flag11:
 .. at flag139:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_21
     PIXEL10_22
     PIXEL11_20
     jmp .loopx_end
 .. at flag19:
 .. at flag51:
-    DiffOrNot w2,w6,PIXEL00_11,PIXEL01_10,PIXEL00_60,PIXEL01_90
+    DiffOrNot ebp+w2,ebp+w6,PIXEL00_11,PIXEL01_10,PIXEL00_60,PIXEL01_90
     PIXEL10_20
     PIXEL11_21
     jmp .loopx_end
 .. at flag146:
 .. at flag178:
     PIXEL00_22
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL11_12,PIXEL01_90,PIXEL11_61
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL11_12,PIXEL01_90,PIXEL11_61
     PIXEL10_20
     jmp .loopx_end
 .. at flag84:
 .. at flag85:
     PIXEL00_20
-    DiffOrNot w6,w8,PIXEL01_11,PIXEL11_10,PIXEL01_60,PIXEL11_90
+    DiffOrNot ebp+w6,ebp+w8,PIXEL01_11,PIXEL11_10,PIXEL01_60,PIXEL11_90
     PIXEL10_21
     jmp .loopx_end
 .. at flag112:
 .. at flag113:
     PIXEL00_20
     PIXEL01_22
-    DiffOrNot w6,w8,PIXEL10_12,PIXEL11_10,PIXEL10_61,PIXEL11_90
+    DiffOrNot ebp+w6,ebp+w8,PIXEL10_12,PIXEL11_10,PIXEL10_61,PIXEL11_90
     jmp .loopx_end
 .. at flag200:
 .. at flag204:
     PIXEL00_21
     PIXEL01_20
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL11_11,PIXEL10_90,PIXEL11_60
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL11_11,PIXEL10_90,PIXEL11_60
     jmp .loopx_end
 .. at flag73:
 .. at flag77:
-    DiffOrNot w8,w4,PIXEL00_12,PIXEL10_10,PIXEL00_61,PIXEL10_90
+    DiffOrNot ebp+w8,ebp+w4,PIXEL00_12,PIXEL10_10,PIXEL00_61,PIXEL10_90
     PIXEL01_20
     PIXEL11_22
     jmp .loopx_end
 .. at flag42:
 .. at flag170:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL10_11,PIXEL00_90,PIXEL10_60
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL10_11,PIXEL00_90,PIXEL10_60
     PIXEL01_21
     PIXEL11_20
     jmp .loopx_end
 .. at flag14:
 .. at flag142:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL01_12,PIXEL00_90,PIXEL01_61
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL01_12,PIXEL00_90,PIXEL01_61
     PIXEL10_22
     PIXEL11_20
     jmp .loopx_end
@@ -1045,41 +1070,41 @@ _hq2x_16:
     jmp .loopx_end
 .. at flag26:
 .. at flag31:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_22
     PIXEL11_21
     jmp .loopx_end
 .. at flag82:
 .. at flag214:
     PIXEL00_22
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_21
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag88:
 .. at flag248:
     PIXEL00_21
     PIXEL01_22
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag74:
 .. at flag107:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_21
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_22
     jmp .loopx_end
 .. at flag27:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_10
     PIXEL10_22
     PIXEL11_21
     jmp .loopx_end
 .. at flag86:
     PIXEL00_22
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_21
     PIXEL11_10
     jmp .loopx_end
@@ -1087,17 +1112,17 @@ _hq2x_16:
     PIXEL00_21
     PIXEL01_22
     PIXEL10_10
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag106:
     PIXEL00_10
     PIXEL01_21
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_22
     jmp .loopx_end
 .. at flag30:
     PIXEL00_10
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_22
     PIXEL11_21
     jmp .loopx_end
@@ -1105,16 +1130,16 @@ _hq2x_16:
     PIXEL00_22
     PIXEL01_10
     PIXEL10_21
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag120:
     PIXEL00_21
     PIXEL01_22
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_10
     jmp .loopx_end
 .. at flag75:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_21
     PIXEL10_10
     PIXEL11_22
@@ -1192,122 +1217,122 @@ _hq2x_16:
     PIXEL11_12
     jmp .loopx_end
 .. at flag58:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
     PIXEL10_11
     PIXEL11_21
     jmp .loopx_end
 .. at flag83:
     PIXEL00_11
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
     PIXEL10_21
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag92:
     PIXEL00_21
     PIXEL01_11
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag202:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
     PIXEL01_21
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
     PIXEL11_11
     jmp .loopx_end
 .. at flag78:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
     PIXEL01_12
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
     PIXEL11_22
     jmp .loopx_end
 .. at flag154:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
     PIXEL10_22
     PIXEL11_12
     jmp .loopx_end
 .. at flag114:
     PIXEL00_22
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
     PIXEL10_12
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag89:
     PIXEL00_12
     PIXEL01_22
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag90:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag55:
 .. at flag23:
-    DiffOrNot w2,w6,PIXEL00_11,PIXEL01_0,PIXEL00_60,PIXEL01_90
+    DiffOrNot ebp+w2,ebp+w6,PIXEL00_11,PIXEL01_0,PIXEL00_60,PIXEL01_90
     PIXEL10_20
     PIXEL11_21
     jmp .loopx_end
 .. at flag182:
 .. at flag150:
     PIXEL00_22
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL11_12,PIXEL01_90,PIXEL11_61
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL11_12,PIXEL01_90,PIXEL11_61
     PIXEL10_20
     jmp .loopx_end
 .. at flag213:
 .. at flag212:
     PIXEL00_20
-    DiffOrNot w6,w8,PIXEL01_11,PIXEL11_0,PIXEL01_60,PIXEL11_90
+    DiffOrNot ebp+w6,ebp+w8,PIXEL01_11,PIXEL11_0,PIXEL01_60,PIXEL11_90
     PIXEL10_21
     jmp .loopx_end
 .. at flag241:
 .. at flag240:
     PIXEL00_20
     PIXEL01_22
-    DiffOrNot w6,w8,PIXEL10_12,PIXEL11_0,PIXEL10_61,PIXEL11_90
+    DiffOrNot ebp+w6,ebp+w8,PIXEL10_12,PIXEL11_0,PIXEL10_61,PIXEL11_90
     jmp .loopx_end
 .. at flag236:
 .. at flag232:
     PIXEL00_21
     PIXEL01_20
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL11_11,PIXEL10_90,PIXEL11_60
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL11_11,PIXEL10_90,PIXEL11_60
     jmp .loopx_end
 .. at flag109:
 .. at flag105:
-    DiffOrNot w8,w4,PIXEL00_12,PIXEL10_0,PIXEL00_61,PIXEL10_90
+    DiffOrNot ebp+w8,ebp+w4,PIXEL00_12,PIXEL10_0,PIXEL00_61,PIXEL10_90
     PIXEL01_20
     PIXEL11_22
     jmp .loopx_end
 .. at flag171:
 .. at flag43:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL10_11,PIXEL00_90,PIXEL10_60
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL10_11,PIXEL00_90,PIXEL10_60
     PIXEL01_21
     PIXEL11_20
     jmp .loopx_end
 .. at flag143:
 .. at flag15:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_12,PIXEL00_90,PIXEL01_61
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL01_12,PIXEL00_90,PIXEL01_61
     PIXEL10_22
     PIXEL11_20
     jmp .loopx_end
 .. at flag124:
     PIXEL00_21
     PIXEL01_11
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_10
     jmp .loopx_end
 .. at flag203:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_21
     PIXEL10_10
     PIXEL11_11
     jmp .loopx_end
 .. at flag62:
     PIXEL00_10
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_11
     PIXEL11_21
     jmp .loopx_end
@@ -1315,11 +1340,11 @@ _hq2x_16:
     PIXEL00_11
     PIXEL01_10
     PIXEL10_21
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag118:
     PIXEL00_22
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_12
     PIXEL11_10
     jmp .loopx_end
@@ -1327,16 +1352,16 @@ _hq2x_16:
     PIXEL00_12
     PIXEL01_22
     PIXEL10_10
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag110:
     PIXEL00_10
     PIXEL01_12
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_22
     jmp .loopx_end
 .. at flag155:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_10
     PIXEL10_22
     PIXEL11_12
@@ -1392,74 +1417,74 @@ _hq2x_16:
 .. at flag220:
     PIXEL00_21
     PIXEL01_11
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag158:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_22
     PIXEL11_12
     jmp .loopx_end
 .. at flag234:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
     PIXEL01_21
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_11
     jmp .loopx_end
 .. at flag242:
     PIXEL00_22
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
     PIXEL10_12
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag59:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
     PIXEL10_11
     PIXEL11_21
     jmp .loopx_end
 .. at flag121:
     PIXEL00_12
     PIXEL01_22
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag87:
     PIXEL00_11
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_21
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag79:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_12
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
     PIXEL11_22
     jmp .loopx_end
 .. at flag122:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag94:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag218:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag91:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag229:
     PIXEL00_20
@@ -1486,39 +1511,39 @@ _hq2x_16:
     PIXEL11_12
     jmp .loopx_end
 .. at flag186:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
     PIXEL10_11
     PIXEL11_12
     jmp .loopx_end
 .. at flag115:
     PIXEL00_11
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
     PIXEL10_12
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag93:
     PIXEL00_12
     PIXEL01_11
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag206:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
     PIXEL01_12
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
     PIXEL11_11
     jmp .loopx_end
 .. at flag205:
 .. at flag201:
     PIXEL00_12
     PIXEL01_20
-    DiffOrNot w8,w4,PIXEL10_10,PIXEL10_70
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_10,PIXEL10_70
     PIXEL11_11
     jmp .loopx_end
 .. at flag174:
 .. at flag46:
-    DiffOrNot w4,w2,PIXEL00_10,PIXEL00_70
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_10,PIXEL00_70
     PIXEL01_12
     PIXEL10_11
     PIXEL11_20
@@ -1526,7 +1551,7 @@ _hq2x_16:
 .. at flag179:
 .. at flag147:
     PIXEL00_11
-    DiffOrNot w2,w6,PIXEL01_10,PIXEL01_70
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_10,PIXEL01_70
     PIXEL10_20
     PIXEL11_12
     jmp .loopx_end
@@ -1535,7 +1560,7 @@ _hq2x_16:
     PIXEL00_20
     PIXEL01_11
     PIXEL10_12
-    DiffOrNot w6,w8,PIXEL11_10,PIXEL11_70
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_10,PIXEL11_70
     jmp .loopx_end
 .. at flag189:
     PIXEL00_12
@@ -1551,53 +1576,53 @@ _hq2x_16:
     jmp .loopx_end
 .. at flag126:
     PIXEL00_10
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_10
     jmp .loopx_end
 .. at flag219:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_10
     PIXEL10_10
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag125:
-    DiffOrNot w8,w4,PIXEL00_12,PIXEL10_0,PIXEL00_61,PIXEL10_90
+    DiffOrNot ebp+w8,ebp+w4,PIXEL00_12,PIXEL10_0,PIXEL00_61,PIXEL10_90
     PIXEL01_11
     PIXEL11_10
     jmp .loopx_end
 .. at flag221:
     PIXEL00_12
-    DiffOrNot w6,w8,PIXEL01_11,PIXEL11_0,PIXEL01_60,PIXEL11_90
+    DiffOrNot ebp+w6,ebp+w8,PIXEL01_11,PIXEL11_0,PIXEL01_60,PIXEL11_90
     PIXEL10_10
     jmp .loopx_end
 .. at flag207:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL01_12,PIXEL00_90,PIXEL01_61
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL01_12,PIXEL00_90,PIXEL01_61
     PIXEL10_10
     PIXEL11_11
     jmp .loopx_end
 .. at flag238:
     PIXEL00_10
     PIXEL01_12
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL11_11,PIXEL10_90,PIXEL11_60
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL11_11,PIXEL10_90,PIXEL11_60
     jmp .loopx_end
 .. at flag190:
     PIXEL00_10
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL11_12,PIXEL01_90,PIXEL11_61
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL11_12,PIXEL01_90,PIXEL11_61
     PIXEL10_11
     jmp .loopx_end
 .. at flag187:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL10_11,PIXEL00_90,PIXEL10_60
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL10_11,PIXEL00_90,PIXEL10_60
     PIXEL01_10
     PIXEL11_12
     jmp .loopx_end
 .. at flag243:
     PIXEL00_11
     PIXEL01_10
-    DiffOrNot w6,w8,PIXEL10_12,PIXEL11_0,PIXEL10_61,PIXEL11_90
+    DiffOrNot ebp+w6,ebp+w8,PIXEL10_12,PIXEL11_0,PIXEL10_61,PIXEL11_90
     jmp .loopx_end
 .. at flag119:
-    DiffOrNot w2,w6,PIXEL00_11,PIXEL01_0,PIXEL00_60,PIXEL01_90
+    DiffOrNot ebp+w2,ebp+w6,PIXEL00_11,PIXEL01_0,PIXEL00_60,PIXEL01_90
     PIXEL10_12
     PIXEL11_10
     jmp .loopx_end
@@ -1605,12 +1630,12 @@ _hq2x_16:
 .. at flag233:
     PIXEL00_12
     PIXEL01_20
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_100
     PIXEL11_11
     jmp .loopx_end
 .. at flag175:
 .. at flag47:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_100
     PIXEL01_12
     PIXEL10_11
     PIXEL11_20
@@ -1618,7 +1643,7 @@ _hq2x_16:
 .. at flag183:
 .. at flag151:
     PIXEL00_11
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_100
     PIXEL10_20
     PIXEL11_12
     jmp .loopx_end
@@ -1627,136 +1652,135 @@ _hq2x_16:
     PIXEL00_20
     PIXEL01_11
     PIXEL10_12
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_100
     jmp .loopx_end
 .. at flag250:
     PIXEL00_10
     PIXEL01_10
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag123:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_10
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_10
     jmp .loopx_end
 .. at flag95:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_10
     PIXEL11_10
     jmp .loopx_end
 .. at flag222:
     PIXEL00_10
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_10
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag252:
     PIXEL00_21
     PIXEL01_11
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_100
     jmp .loopx_end
 .. at flag249:
     PIXEL00_12
     PIXEL01_22
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_100
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag235:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_21
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_100
     PIXEL11_11
     jmp .loopx_end
 .. at flag111:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_100
     PIXEL01_12
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_22
     jmp .loopx_end
 .. at flag63:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_100
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_11
     PIXEL11_21
     jmp .loopx_end
 .. at flag159:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_100
     PIXEL10_22
     PIXEL11_12
     jmp .loopx_end
 .. at flag215:
     PIXEL00_11
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_100
     PIXEL10_21
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag246:
     PIXEL00_22
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
     PIXEL10_12
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_100
     jmp .loopx_end
 .. at flag254:
     PIXEL00_10
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_100
     jmp .loopx_end
 .. at flag253:
     PIXEL00_12
     PIXEL01_11
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_100
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_100
     jmp .loopx_end
 .. at flag251:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
     PIXEL01_10
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_100
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag239:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_100
     PIXEL01_12
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_100
     PIXEL11_11
     jmp .loopx_end
 .. at flag127:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_20
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_20
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_100
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_20
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_20
     PIXEL11_10
     jmp .loopx_end
 .. at flag191:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_100
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_100
     PIXEL10_11
     PIXEL11_12
     jmp .loopx_end
 .. at flag223:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_20
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_20
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_100
     PIXEL10_10
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_20
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_20
     jmp .loopx_end
 .. at flag247:
     PIXEL00_11
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_100
     PIXEL10_12
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_100
     jmp .loopx_end
 .. at flag255:
-    DiffOrNot w4,w2,PIXEL00_0,PIXEL00_100
-    DiffOrNot w2,w6,PIXEL01_0,PIXEL01_100
-    DiffOrNot w8,w4,PIXEL10_0,PIXEL10_100
-    DiffOrNot w6,w8,PIXEL11_0,PIXEL11_100
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_0,PIXEL00_100
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_0,PIXEL01_100
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_0,PIXEL10_100
+    DiffOrNot ebp+w6,ebp+w8,PIXEL11_0,PIXEL11_100
     jmp .loopx_end
 
-
 .. at cross0:
     mov     edx,eax
     shl     eax,16
@@ -1768,13 +1792,13 @@ _hq2x_16:
     mov     edx,eax
     shl     eax,16
     or      eax,edx
-    mov     ecx,[w2]
-    and     edx,[_hqx_highbits]
-    and     ecx,[_hqx_highbits]
+    mov     ecx,[ebp+w2]
+    and     edx,[ebp+highbits]
+    and     ecx,[ebp+highbits]
     add     ecx,edx
     shr     ecx,1
-    add     ecx,[_hqx_lowbits]
-    and     ecx,[_hqx_highbits]
+    add     ecx,[ebp+lowbits]
+    and     ecx,[ebp+highbits]
     add     edx,ecx
     shr     edx,1
     mov     ecx,edx
@@ -1785,13 +1809,13 @@ _hq2x_16:
     jmp     .loopx_end
 .. at cross2:
     shl     eax,16
-    mov     ecx,[w4]
-    and     edx,[_hqx_highbits]
-    and     ecx,[_hqx_highbits]
+    mov     ecx,[ebp+w4]
+    and     edx,[ebp+highbits]
+    and     ecx,[ebp+highbits]
     add     ecx,edx
     shr     ecx,1
-    add     ecx,[_hqx_lowbits]
-    and     ecx,[_hqx_highbits]
+    add     ecx,[ebp+lowbits]
+    and     ecx,[ebp+highbits]
     add     edx,ecx
     shr     edx,1
     or      eax,edx
@@ -1799,13 +1823,13 @@ _hq2x_16:
     mov     [edi+ebx],eax
     jmp     .loopx_end
 .. at cross4:
-    mov     ecx,[w6]
-    and     edx,[_hqx_highbits]
-    and     ecx,[_hqx_highbits]
+    mov     ecx,[ebp+w6]
+    and     edx,[ebp+highbits]
+    and     ecx,[ebp+highbits]
     add     ecx,edx
     shr     ecx,1
-    add     ecx,[_hqx_lowbits]
-    and     ecx,[_hqx_highbits]
+    add     ecx,[ebp+lowbits]
+    and     ecx,[ebp+highbits]
     add     edx,ecx
     shr     edx,1
     shl     edx,16
@@ -1817,13 +1841,13 @@ _hq2x_16:
     mov     edx,eax
     shl     eax,16
     or      eax,edx
-    mov     ecx,[w8]
-    and     edx,[_hqx_highbits]
-    and     ecx,[_hqx_highbits]
+    mov     ecx,[ebp+w8]
+    and     edx,[ebp+highbits]
+    and     ecx,[ebp+highbits]
     add     ecx,edx
     shr     ecx,1
-    add     ecx,[_hqx_lowbits]
-    and     ecx,[_hqx_highbits]
+    add     ecx,[ebp+lowbits]
+    and     ecx,[ebp+highbits]
     add     edx,ecx
     shr     edx,1
     mov     ecx,edx
@@ -1836,18 +1860,18 @@ _hq2x_16:
 .loopx_end:
     add     esi,2
     add     edi,4
-    dec     dword[xcounter]
+    dec     dword[ebp+xcounter]
     jz      .nexty
     jmp     .loopx
 .nexty:
-    add     esi,dword[moduloSrc]
-    add     edi,dword[moduloDst]
-    dec     dword[linesleft]
+    add     esi,dword[ebp+moduloSrc]
+    add     edi,dword[ebp+moduloDst]
+    dec     dword[ebp+linesleft]
     jz      .fin
     mov     ebx,[ebp+srcPitch]
-    mov     dword[nextline],ebx
+    mov     dword[ebp+nextline],ebx
     neg     ebx
-    mov     dword[prevline],ebx
+    mov     dword[ebp+prevline],ebx
     jmp     .loopy
 .fin:
     emms
@@ -1856,46 +1880,49 @@ _hq2x_16:
     pop ebp
     ret
 
-SECTION .data
+_get_pc:
+    mov     eax, dword[esp]
+    ret
+
 FuncTable:
-    dd .. at flag0, .. at flag1, .. at flag2, .. at flag3, .. at flag4, .. at flag5, .. at flag6, .. at flag7
-    dd .. at flag8, .. at flag9, .. at flag10, .. at flag11, .. at flag12, .. at flag13, .. at flag14, .. at flag15
-    dd .. at flag16, .. at flag17, .. at flag18, .. at flag19, .. at flag20, .. at flag21, .. at flag22, .. at flag23
-    dd .. at flag24, .. at flag25, .. at flag26, .. at flag27, .. at flag28, .. at flag29, .. at flag30, .. at flag31
-    dd .. at flag32, .. at flag33, .. at flag34, .. at flag35, .. at flag36, .. at flag37, .. at flag38, .. at flag39
-    dd .. at flag40, .. at flag41, .. at flag42, .. at flag43, .. at flag44, .. at flag45, .. at flag46, .. at flag47
-    dd .. at flag48, .. at flag49, .. at flag50, .. at flag51, .. at flag52, .. at flag53, .. at flag54, .. at flag55
-    dd .. at flag56, .. at flag57, .. at flag58, .. at flag59, .. at flag60, .. at flag61, .. at flag62, .. at flag63
-    dd .. at flag64, .. at flag65, .. at flag66, .. at flag67, .. at flag68, .. at flag69, .. at flag70, .. at flag71
-    dd .. at flag72, .. at flag73, .. at flag74, .. at flag75, .. at flag76, .. at flag77, .. at flag78, .. at flag79
-    dd .. at flag80, .. at flag81, .. at flag82, .. at flag83, .. at flag84, .. at flag85, .. at flag86, .. at flag87
-    dd .. at flag88, .. at flag89, .. at flag90, .. at flag91, .. at flag92, .. at flag93, .. at flag94, .. at flag95
-    dd .. at flag96, .. at flag97, .. at flag98, .. at flag99, .. at flag100, .. at flag101, .. at flag102, .. at flag103
-    dd .. at flag104, .. at flag105, .. at flag106, .. at flag107, .. at flag108, .. at flag109, .. at flag110, .. at flag111
-    dd .. at flag112, .. at flag113, .. at flag114, .. at flag115, .. at flag116, .. at flag117, .. at flag118, .. at flag119
-    dd .. at flag120, .. at flag121, .. at flag122, .. at flag123, .. at flag124, .. at flag125, .. at flag126, .. at flag127
-    dd .. at flag128, .. at flag129, .. at flag130, .. at flag131, .. at flag132, .. at flag133, .. at flag134, .. at flag135
-    dd .. at flag136, .. at flag137, .. at flag138, .. at flag139, .. at flag140, .. at flag141, .. at flag142, .. at flag143
-    dd .. at flag144, .. at flag145, .. at flag146, .. at flag147, .. at flag148, .. at flag149, .. at flag150, .. at flag151
-    dd .. at flag152, .. at flag153, .. at flag154, .. at flag155, .. at flag156, .. at flag157, .. at flag158, .. at flag159
-    dd .. at flag160, .. at flag161, .. at flag162, .. at flag163, .. at flag164, .. at flag165, .. at flag166, .. at flag167
-    dd .. at flag168, .. at flag169, .. at flag170, .. at flag171, .. at flag172, .. at flag173, .. at flag174, .. at flag175
-    dd .. at flag176, .. at flag177, .. at flag178, .. at flag179, .. at flag180, .. at flag181, .. at flag182, .. at flag183
-    dd .. at flag184, .. at flag185, .. at flag186, .. at flag187, .. at flag188, .. at flag189, .. at flag190, .. at flag191
-    dd .. at flag192, .. at flag193, .. at flag194, .. at flag195, .. at flag196, .. at flag197, .. at flag198, .. at flag199
-    dd .. at flag200, .. at flag201, .. at flag202, .. at flag203, .. at flag204, .. at flag205, .. at flag206, .. at flag207
-    dd .. at flag208, .. at flag209, .. at flag210, .. at flag211, .. at flag212, .. at flag213, .. at flag214, .. at flag215
-    dd .. at flag216, .. at flag217, .. at flag218, .. at flag219, .. at flag220, .. at flag221, .. at flag222, .. at flag223
-    dd .. at flag224, .. at flag225, .. at flag226, .. at flag227, .. at flag228, .. at flag229, .. at flag230, .. at flag231
-    dd .. at flag232, .. at flag233, .. at flag234, .. at flag235, .. at flag236, .. at flag237, .. at flag238, .. at flag239
-    dd .. at flag240, .. at flag241, .. at flag242, .. at flag243, .. at flag244, .. at flag245, .. at flag246, .. at flag247
-    dd .. at flag248, .. at flag249, .. at flag250, .. at flag251, .. at flag252, .. at flag253, .. at flag254, .. at flag255
+    dd .. at flag0 - FuncTable, .. at flag1 - FuncTable, .. at flag2 - FuncTable, .. at flag3 - FuncTable, .. at flag4 - FuncTable, .. at flag5 - FuncTable, .. at flag6 - FuncTable, .. at flag7 - FuncTable
+    dd .. at flag8 - FuncTable, .. at flag9 - FuncTable, .. at flag10 - FuncTable, .. at flag11 - FuncTable, .. at flag12 - FuncTable, .. at flag13 - FuncTable, .. at flag14 - FuncTable, .. at flag15 - FuncTable
+    dd .. at flag16 - FuncTable, .. at flag17 - FuncTable, .. at flag18 - FuncTable, .. at flag19 - FuncTable, .. at flag20 - FuncTable, .. at flag21 - FuncTable, .. at flag22 - FuncTable, .. at flag23 - FuncTable
+    dd .. at flag24 - FuncTable, .. at flag25 - FuncTable, .. at flag26 - FuncTable, .. at flag27 - FuncTable, .. at flag28 - FuncTable, .. at flag29 - FuncTable, .. at flag30 - FuncTable, .. at flag31 - FuncTable
+    dd .. at flag32 - FuncTable, .. at flag33 - FuncTable, .. at flag34 - FuncTable, .. at flag35 - FuncTable, .. at flag36 - FuncTable, .. at flag37 - FuncTable, .. at flag38 - FuncTable, .. at flag39 - FuncTable
+    dd .. at flag40 - FuncTable, .. at flag41 - FuncTable, .. at flag42 - FuncTable, .. at flag43 - FuncTable, .. at flag44 - FuncTable, .. at flag45 - FuncTable, .. at flag46 - FuncTable, .. at flag47 - FuncTable
+    dd .. at flag48 - FuncTable, .. at flag49 - FuncTable, .. at flag50 - FuncTable, .. at flag51 - FuncTable, .. at flag52 - FuncTable, .. at flag53 - FuncTable, .. at flag54 - FuncTable, .. at flag55 - FuncTable
+    dd .. at flag56 - FuncTable, .. at flag57 - FuncTable, .. at flag58 - FuncTable, .. at flag59 - FuncTable, .. at flag60 - FuncTable, .. at flag61 - FuncTable, .. at flag62 - FuncTable, .. at flag63 - FuncTable
+    dd .. at flag64 - FuncTable, .. at flag65 - FuncTable, .. at flag66 - FuncTable, .. at flag67 - FuncTable, .. at flag68 - FuncTable, .. at flag69 - FuncTable, .. at flag70 - FuncTable, .. at flag71 - FuncTable
+    dd .. at flag72 - FuncTable, .. at flag73 - FuncTable, .. at flag74 - FuncTable, .. at flag75 - FuncTable, .. at flag76 - FuncTable, .. at flag77 - FuncTable, .. at flag78 - FuncTable, .. at flag79 - FuncTable
+    dd .. at flag80 - FuncTable, .. at flag81 - FuncTable, .. at flag82 - FuncTable, .. at flag83 - FuncTable, .. at flag84 - FuncTable, .. at flag85 - FuncTable, .. at flag86 - FuncTable, .. at flag87 - FuncTable
+    dd .. at flag88 - FuncTable, .. at flag89 - FuncTable, .. at flag90 - FuncTable, .. at flag91 - FuncTable, .. at flag92 - FuncTable, .. at flag93 - FuncTable, .. at flag94 - FuncTable, .. at flag95 - FuncTable
+    dd .. at flag96 - FuncTable, .. at flag97 - FuncTable, .. at flag98 - FuncTable, .. at flag99 - FuncTable, .. at flag100 - FuncTable, .. at flag101 - FuncTable, .. at flag102 - FuncTable, .. at flag103 - FuncTable
+    dd .. at flag104 - FuncTable, .. at flag105 - FuncTable, .. at flag106 - FuncTable, .. at flag107 - FuncTable, .. at flag108 - FuncTable, .. at flag109 - FuncTable, .. at flag110 - FuncTable, .. at flag111 - FuncTable
+    dd .. at flag112 - FuncTable, .. at flag113 - FuncTable, .. at flag114 - FuncTable, .. at flag115 - FuncTable, .. at flag116 - FuncTable, .. at flag117 - FuncTable, .. at flag118 - FuncTable, .. at flag119 - FuncTable
+    dd .. at flag120 - FuncTable, .. at flag121 - FuncTable, .. at flag122 - FuncTable, .. at flag123 - FuncTable, .. at flag124 - FuncTable, .. at flag125 - FuncTable, .. at flag126 - FuncTable, .. at flag127 - FuncTable
+    dd .. at flag128 - FuncTable, .. at flag129 - FuncTable, .. at flag130 - FuncTable, .. at flag131 - FuncTable, .. at flag132 - FuncTable, .. at flag133 - FuncTable, .. at flag134 - FuncTable, .. at flag135 - FuncTable
+    dd .. at flag136 - FuncTable, .. at flag137 - FuncTable, .. at flag138 - FuncTable, .. at flag139 - FuncTable, .. at flag140 - FuncTable, .. at flag141 - FuncTable, .. at flag142 - FuncTable, .. at flag143 - FuncTable
+    dd .. at flag144 - FuncTable, .. at flag145 - FuncTable, .. at flag146 - FuncTable, .. at flag147 - FuncTable, .. at flag148 - FuncTable, .. at flag149 - FuncTable, .. at flag150 - FuncTable, .. at flag151 - FuncTable
+    dd .. at flag152 - FuncTable, .. at flag153 - FuncTable, .. at flag154 - FuncTable, .. at flag155 - FuncTable, .. at flag156 - FuncTable, .. at flag157 - FuncTable, .. at flag158 - FuncTable, .. at flag159 - FuncTable
+    dd .. at flag160 - FuncTable, .. at flag161 - FuncTable, .. at flag162 - FuncTable, .. at flag163 - FuncTable, .. at flag164 - FuncTable, .. at flag165 - FuncTable, .. at flag166 - FuncTable, .. at flag167 - FuncTable
+    dd .. at flag168 - FuncTable, .. at flag169 - FuncTable, .. at flag170 - FuncTable, .. at flag171 - FuncTable, .. at flag172 - FuncTable, .. at flag173 - FuncTable, .. at flag174 - FuncTable, .. at flag175 - FuncTable
+    dd .. at flag176 - FuncTable, .. at flag177 - FuncTable, .. at flag178 - FuncTable, .. at flag179 - FuncTable, .. at flag180 - FuncTable, .. at flag181 - FuncTable, .. at flag182 - FuncTable, .. at flag183 - FuncTable
+    dd .. at flag184 - FuncTable, .. at flag185 - FuncTable, .. at flag186 - FuncTable, .. at flag187 - FuncTable, .. at flag188 - FuncTable, .. at flag189 - FuncTable, .. at flag190 - FuncTable, .. at flag191 - FuncTable
+    dd .. at flag192 - FuncTable, .. at flag193 - FuncTable, .. at flag194 - FuncTable, .. at flag195 - FuncTable, .. at flag196 - FuncTable, .. at flag197 - FuncTable, .. at flag198 - FuncTable, .. at flag199 - FuncTable
+    dd .. at flag200 - FuncTable, .. at flag201 - FuncTable, .. at flag202 - FuncTable, .. at flag203 - FuncTable, .. at flag204 - FuncTable, .. at flag205 - FuncTable, .. at flag206 - FuncTable, .. at flag207 - FuncTable
+    dd .. at flag208 - FuncTable, .. at flag209 - FuncTable, .. at flag210 - FuncTable, .. at flag211 - FuncTable, .. at flag212 - FuncTable, .. at flag213 - FuncTable, .. at flag214 - FuncTable, .. at flag215 - FuncTable
+    dd .. at flag216 - FuncTable, .. at flag217 - FuncTable, .. at flag218 - FuncTable, .. at flag219 - FuncTable, .. at flag220 - FuncTable, .. at flag221 - FuncTable, .. at flag222 - FuncTable, .. at flag223 - FuncTable
+    dd .. at flag224 - FuncTable, .. at flag225 - FuncTable, .. at flag226 - FuncTable, .. at flag227 - FuncTable, .. at flag228 - FuncTable, .. at flag229 - FuncTable, .. at flag230 - FuncTable, .. at flag231 - FuncTable
+    dd .. at flag232 - FuncTable, .. at flag233 - FuncTable, .. at flag234 - FuncTable, .. at flag235 - FuncTable, .. at flag236 - FuncTable, .. at flag237 - FuncTable, .. at flag238 - FuncTable, .. at flag239 - FuncTable
+    dd .. at flag240 - FuncTable, .. at flag241 - FuncTable, .. at flag242 - FuncTable, .. at flag243 - FuncTable, .. at flag244 - FuncTable, .. at flag245 - FuncTable, .. at flag246 - FuncTable, .. at flag247 - FuncTable
+    dd .. at flag248 - FuncTable, .. at flag249 - FuncTable, .. at flag250 - FuncTable, .. at flag251 - FuncTable, .. at flag252 - FuncTable, .. at flag253 - FuncTable, .. at flag254 - FuncTable, .. at flag255 - FuncTable
 
 FuncTable2:
-    dd .. at cross0, .. at cross1, .. at cross2, .. at flag0,
-    dd .. at cross4, .. at flag0,  .. at flag0,  .. at flag0,
-    dd .. at cross8, .. at flag0,  .. at flag0,  .. at flag0,
-    dd .. at flag0,  .. at flag0,  .. at flag0,  .. at flag0
+    dd .. at cross0 - FuncTable2, .. at cross1 - FuncTable2, .. at cross2 - FuncTable2, .. at flag0 - FuncTable2,
+    dd .. at cross4 - FuncTable2, .. at flag0  - FuncTable2, .. at flag0  - FuncTable2, .. at flag0 - FuncTable2,
+    dd .. at cross8 - FuncTable2, .. at flag0  - FuncTable2, .. at flag0  - FuncTable2, .. at flag0 - FuncTable2,
+    dd .. at flag0  - FuncTable2, .. at flag0  - FuncTable2, .. at flag0  - FuncTable2, .. at flag0 - FuncTable2
 
 
 %ifidn __OUTPUT_FORMAT__,elf
diff --git a/graphics/scaler/hq3x_i386.asm b/graphics/scaler/hq3x_i386.asm
index 209a7b8a34a..8d1224c90fc 100644
--- a/graphics/scaler/hq3x_i386.asm
+++ b/graphics/scaler/hq3x_i386.asm
@@ -20,49 +20,61 @@
 
 GLOBAL _hq3x_16
 
-EXTERN _RGBtoYUV
-EXTERN _hqx_highbits
-EXTERN _hqx_lowbits
-EXTERN _hqx_low2bits
-EXTERN _hqx_low3bits
-EXTERN _hqx_greenMask
-EXTERN _hqx_redBlueMask
-EXTERN _hqx_green_redBlue_Mask
-
-SECTION .bss
-linesleft resd 1
-xcounter  resd 1
-cross     resd 1
-nextline  resd 1
-prevline  resd 1
-w1        resd 1
-w2        resd 1
-w3        resd 1
-w4        resd 1
-w5        resd 1
-w6        resd 1
-w7        resd 1
-w8        resd 1
-w9        resd 1
-
-tmpData        resd 1
-
-SECTION .data
-
-reg_blank    dd  0,0
-const7       dd  0x00070007,0x00000007
-threshold    dd  0x00300706,0x00000000
-moduloSrc    dd  0
-moduloDst    dd  0
-
 SECTION .text
 
+thresholdVal0        equ 0x00300706
+thresholdVal1        equ 0x00000000
+
+; Function variables space
+
+tmp_data             equ -116
+w1                   equ -112
+w2                   equ -108
+w3                   equ -104
+w4                   equ -100
+w5                   equ -96
+w6                   equ -92
+w7                   equ -88
+w8                   equ -84
+w9                   equ -80
+linesleft            equ -76
+xcounter             equ -72
+cross                equ -68
+nextline             equ -64
+prevline             equ -60
+moduloSrc            equ -56
+moduloDst            equ -52
+threshold            equ -48
+; threshold is 64bits
+pFuncTable2          equ -40
+pFuncTable           equ -36
+
+; the following variables must match hqx_parameters structure
+; Address of the whole structure
+hqx_params_          equ -32
+RGBtoYUV             equ -32
+highbits             equ -28
+lowbits              equ -24
+low2bits             equ -20
+low3bits             equ -16
+greenMask            equ -12
+redBlueMask          equ -8
+green_redBlue_Mask   equ -4
+
+inbuffer     equ 8
+outbuffer    equ 12
+Xres         equ 16
+Yres         equ 20
+srcPitch     equ 24
+dstPitch     equ 28
+hqx_params   equ 32
+
 %macro TestDiff 2
     xor     ecx,ecx
     mov     edx,[%1]
     cmp     edx,[%2]
     je      %%fin
-    mov     ecx,[_RGBtoYUV]
+    mov     ecx,[ebp+RGBtoYUV]
     movd    mm1,[ecx+edx*4]
     movq    mm5,mm1
     mov     edx,[%2]
@@ -70,7 +82,7 @@ SECTION .text
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    ecx,mm1
 %%fin:
 %endmacro
@@ -136,12 +148,12 @@ SECTION .text
 %macro Interp1 3
     mov edx,%2
     mov ecx,%3
-    and edx,[_hqx_highbits]
-    and ecx,[_hqx_highbits]
+    and edx,[ebp+highbits]
+    and ecx,[ebp+highbits]
     add ecx,edx
     shr ecx,1
-    add ecx,[_hqx_lowbits]
-    and ecx,[_hqx_highbits]
+    add ecx,[ebp+lowbits]
+    and ecx,[ebp+highbits]
     add edx,ecx
     shr edx,1
     mov %1,dx
@@ -152,14 +164,14 @@ SECTION .text
 %macro Interp2 4
     mov edx,%3
     mov ecx,%4
-    and edx,[_hqx_highbits]
-    and ecx,[_hqx_highbits]
+    and edx,[ebp+highbits]
+    and ecx,[ebp+highbits]
     add ecx,edx
     shr ecx,1
-    add ecx,[_hqx_lowbits]
+    add ecx,[ebp+lowbits]
     mov edx,%2
-    and ecx,[_hqx_highbits]
-    and edx,[_hqx_highbits]
+    and ecx,[ebp+highbits]
+    and edx,[ebp+highbits]
     add edx,ecx
     shr edx,1
     mov %1,dx
@@ -170,26 +182,26 @@ SECTION .text
 %macro Interp3 2
 	; ((c1&kLowBitsMask)<<2)
 	mov ecx,eax
-	and ecx,[_hqx_lowbits]
+	and ecx,[ebp+lowbits]
 	shl ecx,2
 
 	; + ((c1&kLow2Bits)<<1)
 	mov edx,eax
-	and edx,[_hqx_low2bits]
+	and edx,[ebp+low2bits]
 	lea ecx, [ecx + 2*edx]
 
 	; + (c1&kLow3Bits)
 	mov edx,eax
-	and edx,[_hqx_low3bits]
+	and edx,[ebp+low3bits]
 	add ecx,edx
 
 	; + (c2&kLow3Bits)
 	mov edx,%2
-	and edx,[_hqx_low3bits]
+	and edx,[ebp+low3bits]
 	add ecx,edx
 
 	; & kLow3Bits  -> ecx
-	and ecx,[_hqx_low3bits]
+	and ecx,[ebp+low3bits]
 
 	; compute ((c1*7+c2) - ecx) >> 3;
 	lea edx,[8*eax]
@@ -209,13 +221,13 @@ SECTION .text
 	mov edx, %2
 	shl edx, 16
 	or  edx, %2
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; unpack c3 to ecx
 	mov ecx, %3
 	shl ecx, 16
 	or  ecx, %3
-	and ecx, [_hqx_green_redBlue_Mask]
+	and ecx, [ebp+green_redBlue_Mask]
 
 	; sum c2 and c3 -> store in ecx
 	add ecx, edx
@@ -228,12 +240,12 @@ SECTION .text
 	mov ecx, eax
 	shl ecx, 16
 	or  ecx, eax
-	and ecx, [_hqx_green_redBlue_Mask]
+	and ecx, [ebp+green_redBlue_Mask]
 
 	; sum 2*c1 + 7*(c2+c3), divide by 16, mask the result
 	lea edx, [edx + 2*ecx]
 	shr edx, 4
-	and edx, [_hqx_green_redBlue_Mask]
+	and edx, [ebp+green_redBlue_Mask]
 
 	; finally, repack the mixed pixel
 	mov ecx, edx
@@ -250,38 +262,38 @@ SECTION .text
     mov ecx,%3
 
     xor edx,ecx       ; xor pixels
-    mov [tmpData],edx ; store tmp result
+    mov [ebp+tmp_data],edx ; store tmp result
     xor edx,ecx       ; restore original value of edx (avoids a reload)
     add edx,ecx       ; sum pixels
-    mov ecx,[tmpData]
-    and ecx,[_hqx_lowbits]
+    mov ecx,[ebp+tmp_data]
+    and ecx,[ebp+lowbits]
     sub edx,ecx
     shr edx,1
     mov %1,dx
 %endmacro
 
 %macro PIXEL00_1M 0
-    Interp1 [edi],eax,[w1]
+    Interp1 [edi],eax,[ebp+w1]
 %endmacro
 
 %macro PIXEL00_1U 0
-    Interp1 [edi],eax,[w2]
+    Interp1 [edi],eax,[ebp+w2]
 %endmacro
 
 %macro PIXEL00_1L 0
-    Interp1 [edi],eax,[w4]
+    Interp1 [edi],eax,[ebp+w4]
 %endmacro
 
 %macro PIXEL00_2 0
-    Interp2 [edi],eax,[w4],[w2]
+    Interp2 [edi],eax,[ebp+w4],[ebp+w2]
 %endmacro
 
 %macro PIXEL00_4 0
-    Interp4 [edi],[w4],[w2]
+    Interp4 [edi],[ebp+w4],[ebp+w2]
 %endmacro
 
 %macro PIXEL00_5 0
-    Interp5 [edi],[w4],[w2]
+    Interp5 [edi],[ebp+w4],[ebp+w2]
 %endmacro
 
 %macro PIXEL00_C 0
@@ -289,15 +301,15 @@ SECTION .text
 %endmacro
 
 %macro PIXEL01_1 0
-    Interp1 [edi+2],eax,[w2]
+    Interp1 [edi+2],eax,[ebp+w2]
 %endmacro
 
 %macro PIXEL01_3 0
-    Interp3 [edi+2],[w2]
+    Interp3 [edi+2],[ebp+w2]
 %endmacro
 
 %macro PIXEL01_6 0
-    Interp1 [edi+2],[w2],eax
+    Interp1 [edi+2],[ebp+w2],eax
 %endmacro
 
 %macro PIXEL01_C 0
@@ -305,27 +317,27 @@ SECTION .text
 %endmacro
 
 %macro PIXEL02_1M 0
-    Interp1 [edi+4],eax,[w3]
+    Interp1 [edi+4],eax,[ebp+w3]
 %endmacro
 
 %macro PIXEL02_1U 0
-    Interp1 [edi+4],eax,[w2]
+    Interp1 [edi+4],eax,[ebp+w2]
 %endmacro
 
 %macro PIXEL02_1R 0
-    Interp1 [edi+4],eax,[w6]
+    Interp1 [edi+4],eax,[ebp+w6]
 %endmacro
 
 %macro PIXEL02_2 0
-    Interp2 [edi+4],eax,[w2],[w6]
+    Interp2 [edi+4],eax,[ebp+w2],[ebp+w6]
 %endmacro
 
 %macro PIXEL02_4 0
-    Interp4 [edi+4],[w2],[w6]
+    Interp4 [edi+4],[ebp+w2],[ebp+w6]
 %endmacro
 
 %macro PIXEL02_5 0
-    Interp5 [edi+4],[w2],[w6]
+    Interp5 [edi+4],[ebp+w2],[ebp+w6]
 %endmacro
 
 %macro PIXEL02_C 0
@@ -333,15 +345,15 @@ SECTION .text
 %endmacro
 
 %macro PIXEL10_1 0
-    Interp1 [edi+ebx],eax,[w4]
+    Interp1 [edi+ebx],eax,[ebp+w4]
 %endmacro
 
 %macro PIXEL10_3 0
-    Interp3 [edi+ebx],[w4]
+    Interp3 [edi+ebx],[ebp+w4]
 %endmacro
 
 %macro PIXEL10_6 0
-    Interp1 [edi+ebx],[w4],eax
+    Interp1 [edi+ebx],[ebp+w4],eax
 %endmacro
 
 %macro PIXEL10_C 0
@@ -353,15 +365,15 @@ SECTION .text
 %endmacro
 
 %macro PIXEL12_1 0
-    Interp1 [edi+ebx+4],eax,[w6]
+    Interp1 [edi+ebx+4],eax,[ebp+w6]
 %endmacro
 
 %macro PIXEL12_3 0
-    Interp3 [edi+ebx+4],[w6]
+    Interp3 [edi+ebx+4],[ebp+w6]
 %endmacro
 
 %macro PIXEL12_6 0
-    Interp1 [edi+ebx+4],[w6],eax
+    Interp1 [edi+ebx+4],[ebp+w6],eax
 %endmacro
 
 %macro PIXEL12_C 0
@@ -369,27 +381,27 @@ SECTION .text
 %endmacro
 
 %macro PIXEL20_1M 0
-    Interp1 [edi+ebx*2],eax,[w7]
+    Interp1 [edi+ebx*2],eax,[ebp+w7]
 %endmacro
 
 %macro PIXEL20_1D 0
-    Interp1 [edi+ebx*2],eax,[w8]
+    Interp1 [edi+ebx*2],eax,[ebp+w8]
 %endmacro
 
 %macro PIXEL20_1L 0
-    Interp1 [edi+ebx*2],eax,[w4]
+    Interp1 [edi+ebx*2],eax,[ebp+w4]
 %endmacro
 
 %macro PIXEL20_2 0
-    Interp2 [edi+ebx*2],eax,[w8],[w4]
+    Interp2 [edi+ebx*2],eax,[ebp+w8],[ebp+w4]
 %endmacro
 
 %macro PIXEL20_4 0
-    Interp4 [edi+ebx*2],[w8],[w4]
+    Interp4 [edi+ebx*2],[ebp+w8],[ebp+w4]
 %endmacro
 
 %macro PIXEL20_5 0
-    Interp5 [edi+ebx*2],[w8],[w4]
+    Interp5 [edi+ebx*2],[ebp+w8],[ebp+w4]
 %endmacro
 
 %macro PIXEL20_C 0
@@ -397,15 +409,15 @@ SECTION .text
 %endmacro
 
 %macro PIXEL21_1 0
-    Interp1 [edi+ebx*2+2],eax,[w8]
+    Interp1 [edi+ebx*2+2],eax,[ebp+w8]
 %endmacro
 
 %macro PIXEL21_3 0
-    Interp3 [edi+ebx*2+2],[w8]
+    Interp3 [edi+ebx*2+2],[ebp+w8]
 %endmacro
 
 %macro PIXEL21_6 0
-    Interp1 [edi+ebx*2+2],[w8],eax
+    Interp1 [edi+ebx*2+2],[ebp+w8],eax
 %endmacro
 
 %macro PIXEL21_C 0
@@ -413,163 +425,172 @@ SECTION .text
 %endmacro
 
 %macro PIXEL22_1M 0
-    Interp1 [edi+ebx*2+4],eax,[w9]
+    Interp1 [edi+ebx*2+4],eax,[ebp+w9]
 %endmacro
 
 %macro PIXEL22_1D 0
-    Interp1 [edi+ebx*2+4],eax,[w8]
+    Interp1 [edi+ebx*2+4],eax,[ebp+w8]
 %endmacro
 
 %macro PIXEL22_1R 0
-    Interp1 [edi+ebx*2+4],eax,[w6]
+    Interp1 [edi+ebx*2+4],eax,[ebp+w6]
 %endmacro
 
 %macro PIXEL22_2 0
-    Interp2 [edi+ebx*2+4],eax,[w6],[w8]
+    Interp2 [edi+ebx*2+4],eax,[ebp+w6],[ebp+w8]
 %endmacro
 
 %macro PIXEL22_4 0
-    Interp4 [edi+ebx*2+4],[w6],[w8]
+    Interp4 [edi+ebx*2+4],[ebp+w6],[ebp+w8]
 %endmacro
 
 %macro PIXEL22_5 0
-    Interp5 [edi+ebx*2+4],[w6],[w8]
+    Interp5 [edi+ebx*2+4],[ebp+w6],[ebp+w8]
 %endmacro
 
 %macro PIXEL22_C 0
     mov [edi+ebx*2+4],ax
 %endmacro
 
-inbuffer     equ 8
-outbuffer    equ 12
-Xres         equ 16
-Yres         equ 20
-srcPitch     equ 24
-dstPitch     equ 28
-
 _hq3x_16:
     push ebp
     mov ebp,esp
+    sub esp,116
     pushad
 
+    call    _get_pc
+.pc_anchor:
+    mov     ecx,eax
+    add     eax,(FuncTable-.pc_anchor)
+    mov     dword[ebp+pFuncTable],eax
+    add     ecx,(FuncTable2-.pc_anchor)
+    mov     dword[ebp+pFuncTable2],ecx
+    mov     dword[ebp+threshold],thresholdVal0
+    mov     dword[ebp+threshold+4], thresholdVal1
+
+    mov     esi,[ebp+hqx_params]
+    lea     edi,[ebp+hqx_params_]
+    mov     ecx,(green_redBlue_Mask+4-hqx_params_)/4
+    rep     movsd
+
     mov     esi,[ebp+inbuffer]
     mov     edi,[ebp+outbuffer]
     mov     edx,[ebp+Yres]
-    mov     [linesleft],edx
+    mov     [ebp+linesleft],edx
     mov     ecx,[ebp+Xres]
     shl     ecx,1
     mov     ebx,[ebp+dstPitch]
-    mov     dword[moduloDst],ebx
-    sub     dword[moduloDst],ecx
-    mov     ecx,dword[moduloDst]
-    shl     dword[moduloDst],1
-    add     dword[moduloDst],ecx
+    mov     dword[ebp+moduloDst],ebx
+    sub     dword[ebp+moduloDst],ecx
+    mov     ecx,dword[ebp+moduloDst]
+    shl     dword[ebp+moduloDst],1
+    add     dword[ebp+moduloDst],ecx
     mov     ecx,[ebp+Xres]
     shl     ecx,1
     mov     ebx,[ebp+srcPitch]
-    mov     dword[nextline],ebx
-    mov     dword[moduloSrc],ebx
-    sub     dword[moduloSrc],ecx
+    mov     dword[ebp+nextline],ebx
+    mov     dword[ebp+moduloSrc],ebx
+    sub     dword[ebp+moduloSrc],ecx
     neg     ebx
-    mov     dword[prevline],ebx
+    mov     dword[ebp+prevline],ebx
 .loopy:
     mov     ecx,[ebp+Xres]
-    mov     dword[xcounter],ecx
+    mov     dword[ebp+xcounter],ecx
 .loopx:
-    mov     ebx,[prevline]
+    mov     ebx,[ebp+prevline]
     movq    mm5,[esi+ebx-2]
     movq    mm6,[esi-2]
-    mov     ebx,[nextline]
+    mov     ebx,[ebp+nextline]
     movq    mm7,[esi+ebx-2]
     movd    eax,mm5
     movzx   edx,ax
-    mov     [w1],edx
+    mov     [ebp+w1],edx
     shr     eax,16
-    mov     [w2],eax
+    mov     [ebp+w2],eax
     psrlq   mm5,32
     movd    eax,mm5
     movzx   edx,ax
-    mov     [w3],edx
+    mov     [ebp+w3],edx
     movd    eax,mm6
     movzx   edx,ax
-    mov     [w4],edx
+    mov     [ebp+w4],edx
     shr     eax,16
-    mov     [w5],eax
+    mov     [ebp+w5],eax
     psrlq   mm6,32
     movd    eax,mm6
     movzx   edx,ax
-    mov     [w6],edx
+    mov     [ebp+w6],edx
     movd    eax,mm7
     movzx   edx,ax
-    mov     [w7],edx
+    mov     [ebp+w7],edx
     shr     eax,16
-    mov     [w8],eax
+    mov     [ebp+w8],eax
     psrlq   mm7,32
     movd    eax,mm7
     movzx   edx,ax
-    mov     [w9],edx
+    mov     [ebp+w9],edx
 .flags:
-    mov     ebx,[_RGBtoYUV]
-    mov     eax,[w5]
+    mov     ebx,[ebp+RGBtoYUV]
+    mov     eax,[ebp+w5]
     xor     ecx,ecx
     movd    mm5,[ebx+eax*4]
-    mov     dword[cross],0
+    mov     dword[ebp+cross],0
 
-    mov     edx,[w2]
+    mov     edx,[ebp+w2]
     cmp     eax,edx
     je      .noflag2
-    or      dword[cross],1
+    or      dword[ebp+cross],1
     movq    mm1,mm5
     movd    mm2,[ebx+edx*4]
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag2
     or      ecx,2
 .noflag2:
-    mov     edx,[w4]
+    mov     edx,[ebp+w4]
     cmp     eax,edx
     je      .noflag4
-    or      dword[cross],2
+    or      dword[ebp+cross],2
     movq    mm1,mm5
     movd    mm2,[ebx+edx*4]
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag4
     or      ecx,8
 .noflag4:
-    mov     edx,[w6]
+    mov     edx,[ebp+w6]
     cmp     eax,edx
     je      .noflag6
-    or      dword[cross],4
+    or      dword[ebp+cross],4
     movq    mm1,mm5
     movd    mm2,[ebx+edx*4]
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag6
     or      ecx,16
 .noflag6:
-    mov     edx,[w8]
+    mov     edx,[ebp+w8]
     cmp     eax,edx
     je      .noflag8
-    or      dword[cross],8
+    or      dword[ebp+cross],8
     movq    mm1,mm5
     movd    mm2,[ebx+edx*4]
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag8
@@ -577,11 +598,14 @@ _hq3x_16:
 .noflag8:
     test    ecx,ecx
     jnz     .testflag1
-    mov     ecx,[cross]
+    mov     ecx,[ebp+cross]
+    mov     ebx, [ebp+pFuncTable2]
+    mov     ecx, [ebx+ecx*4]
+    add     ecx, ebx
     mov     ebx,[ebp+dstPitch]
-    jmp     [FuncTable2+ecx*4]
+    jmp     ecx
 .testflag1:
-    mov     edx,[w1]
+    mov     edx,[ebp+w1]
     cmp     eax,edx
     je      .noflag1
     movq    mm1,mm5
@@ -589,13 +613,13 @@ _hq3x_16:
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag1
     or      ecx,1
 .noflag1:
-    mov     edx,[w3]
+    mov     edx,[ebp+w3]
     cmp     eax,edx
     je      .noflag3
     movq    mm1,mm5
@@ -603,13 +627,13 @@ _hq3x_16:
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag3
     or      ecx,4
 .noflag3:
-    mov     edx,[w7]
+    mov     edx,[ebp+w7]
     cmp     eax,edx
     je      .noflag7
     movq    mm1,mm5
@@ -617,13 +641,13 @@ _hq3x_16:
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag7
     or      ecx,32
 .noflag7:
-    mov     edx,[w9]
+    mov     edx,[ebp+w9]
     cmp     eax,edx
     je      .noflag9
     movq    mm1,mm5
@@ -631,14 +655,17 @@ _hq3x_16:
     psubusb mm1,mm2
     psubusb mm2,mm5
     por     mm1,mm2
-    psubusb mm1,[threshold]
+    psubusb mm1,[ebp+threshold]
     movd    edx,mm1
     test    edx,edx
     jz      .noflag9
     or      ecx,128
 .noflag9:
+    mov  ebx, [ebp+pFuncTable]
+    mov  ecx, [ebx+ecx*4]
+    add  ecx, ebx
     mov  ebx,[ebp+dstPitch]
-    jmp  [FuncTable+ecx*4]
+    jmp  ecx
 
 .. at flag0:
 .. at flag1:
@@ -837,7 +864,7 @@ _hq3x_16:
 .. at flag18:
 .. at flag50:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
     PIXEL10_1
     PIXEL11
     PIXEL20_2
@@ -852,7 +879,7 @@ _hq3x_16:
     PIXEL10_1
     PIXEL11
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL12_3,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag72:
 .. at flag76:
@@ -861,12 +888,12 @@ _hq3x_16:
     PIXEL02_2
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag10:
 .. at flag138:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
     PIXEL02_1M
     PIXEL11
     PIXEL12_1
@@ -951,7 +978,7 @@ _hq3x_16:
 .. at flag22:
 .. at flag54:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
     PIXEL10_1
     PIXEL11
     PIXEL20_2
@@ -966,7 +993,7 @@ _hq3x_16:
     PIXEL10_1
     PIXEL11
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag104:
 .. at flag108:
@@ -975,12 +1002,12 @@ _hq3x_16:
     PIXEL02_2
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag11:
 .. at flag139:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
     PIXEL02_1M
     PIXEL11
     PIXEL12_1
@@ -990,7 +1017,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag19:
 .. at flag51:
-    DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+    DiffOrNot ebp+w2,ebp+w6,PIXEL00_1L,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
     PIXEL10_1
     PIXEL11
     PIXEL20_2
@@ -999,7 +1026,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag146:
 .. at flag178:
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_1M,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
     PIXEL00_1M
     PIXEL10_1
     PIXEL11
@@ -1008,7 +1035,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag84:
 .. at flag85:
-    DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+    DiffOrNot ebp+w6,ebp+w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_1M,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
     PIXEL00_2
     PIXEL01_1
     PIXEL10_1
@@ -1017,7 +1044,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag112:
 .. at flag113:
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_1M,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_1M,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
     PIXEL00_2
     PIXEL01_1
     PIXEL02_1M
@@ -1026,7 +1053,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag200:
 .. at flag204:
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
     PIXEL00_1M
     PIXEL01_1
     PIXEL02_2
@@ -1035,7 +1062,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag73:
 .. at flag77:
-    DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+    DiffOrNot ebp+w8,ebp+w4,PIXEL00_1U,PIXEL10_C,PIXEL20_1M,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
     PIXEL01_1
     PIXEL02_2
     PIXEL11
@@ -1044,7 +1071,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag42:
 .. at flag170:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
     PIXEL02_1M
     PIXEL11
     PIXEL12_1
@@ -1053,7 +1080,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag14:
 .. at flag142:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
     PIXEL11
     PIXEL12_1
     PIXEL20_1M
@@ -1150,9 +1177,9 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag26:
 .. at flag31:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
     PIXEL11
     PIXEL20_1M
     PIXEL21_1
@@ -1161,12 +1188,12 @@ _hq3x_16:
 .. at flag82:
 .. at flag214:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
     PIXEL10_1
     PIXEL11
     PIXEL12_C
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag88:
 .. at flag248:
@@ -1174,22 +1201,22 @@ _hq3x_16:
     PIXEL01_1
     PIXEL02_1M
     PIXEL11
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
     jmp .loopx_end
 .. at flag74:
 .. at flag107:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
     PIXEL02_1M
     PIXEL10_C
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag27:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
     PIXEL02_1M
     PIXEL11
     PIXEL12_C
@@ -1199,7 +1226,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag86:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
     PIXEL10_1
     PIXEL11
     PIXEL20_1M
@@ -1213,7 +1240,7 @@ _hq3x_16:
     PIXEL10_C
     PIXEL11
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag106:
     PIXEL00_1M
@@ -1221,12 +1248,12 @@ _hq3x_16:
     PIXEL02_1M
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag30:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
     PIXEL10_C
     PIXEL11
     PIXEL20_1M
@@ -1240,7 +1267,7 @@ _hq3x_16:
     PIXEL10_1
     PIXEL11
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag120:
     PIXEL00_1M
@@ -1248,11 +1275,11 @@ _hq3x_16:
     PIXEL02_1M
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag75:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
     PIXEL02_1M
     PIXEL11
     PIXEL12_1
@@ -1393,9 +1420,9 @@ _hq3x_16:
     PIXEL22_1D
     jmp .loopx_end
 .. at flag58:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL10_C
     PIXEL11
     PIXEL12_C
@@ -1406,13 +1433,13 @@ _hq3x_16:
 .. at flag83:
     PIXEL00_1L
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL10_1
     PIXEL11
     PIXEL12_C
     PIXEL20_1M
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag92:
     PIXEL00_1M
@@ -1421,36 +1448,36 @@ _hq3x_16:
     PIXEL10_C
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag202:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
     PIXEL02_1M
     PIXEL10_C
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
     PIXEL22_1R
     jmp .loopx_end
 .. at flag78:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
     PIXEL02_1R
     PIXEL10_C
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
     PIXEL22_1M
     jmp .loopx_end
 .. at flag154:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL10_C
     PIXEL11
     PIXEL12_C
@@ -1461,13 +1488,13 @@ _hq3x_16:
 .. at flag114:
     PIXEL00_1M
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL10_1
     PIXEL11
     PIXEL12_C
     PIXEL20_1L
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag89:
     PIXEL00_1U
@@ -1476,24 +1503,24 @@ _hq3x_16:
     PIXEL10_C
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag90:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL10_C
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag55:
 .. at flag23:
-    DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+    DiffOrNot ebp+w2,ebp+w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
     PIXEL10_1
     PIXEL11
     PIXEL20_2
@@ -1502,7 +1529,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag182:
 .. at flag150:
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
     PIXEL00_1M
     PIXEL10_1
     PIXEL11
@@ -1511,7 +1538,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag213:
 .. at flag212:
-    DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+    DiffOrNot ebp+w6,ebp+w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
     PIXEL00_2
     PIXEL01_1
     PIXEL10_1
@@ -1520,7 +1547,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag241:
 .. at flag240:
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
     PIXEL00_2
     PIXEL01_1
     PIXEL02_1M
@@ -1529,7 +1556,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag236:
 .. at flag232:
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
     PIXEL00_1M
     PIXEL01_1
     PIXEL02_2
@@ -1538,7 +1565,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag109:
 .. at flag105:
-    DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+    DiffOrNot ebp+w8,ebp+w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
     PIXEL01_1
     PIXEL02_2
     PIXEL11
@@ -1547,7 +1574,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag171:
 .. at flag43:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
     PIXEL02_1M
     PIXEL11
     PIXEL12_1
@@ -1556,7 +1583,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag143:
 .. at flag15:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
     PIXEL11
     PIXEL12_1
     PIXEL20_1M
@@ -1569,11 +1596,11 @@ _hq3x_16:
     PIXEL02_1U
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag203:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
     PIXEL02_1M
     PIXEL11
     PIXEL12_1
@@ -1583,7 +1610,7 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag62:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
     PIXEL10_C
     PIXEL11
     PIXEL20_1D
@@ -1597,11 +1624,11 @@ _hq3x_16:
     PIXEL10_1
     PIXEL11
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag118:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
     PIXEL10_1
     PIXEL11
     PIXEL20_1L
@@ -1615,7 +1642,7 @@ _hq3x_16:
     PIXEL10_C
     PIXEL11
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag110:
     PIXEL00_1M
@@ -1623,11 +1650,11 @@ _hq3x_16:
     PIXEL02_1R
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag155:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
     PIXEL02_1M
     PIXEL11
     PIXEL12_C
@@ -1729,12 +1756,12 @@ _hq3x_16:
     PIXEL02_1U
     PIXEL10_C
     PIXEL11
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag158:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
     PIXEL10_C
     PIXEL11
     PIXEL20_1M
@@ -1742,26 +1769,26 @@ _hq3x_16:
     PIXEL22_1D
     jmp .loopx_end
 .. at flag234:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
     PIXEL02_1M
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
     PIXEL22_1R
     jmp .loopx_end
 .. at flag242:
     PIXEL00_1M
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL10_1
     PIXEL11
     PIXEL20_1L
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag59:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL11
     PIXEL12_C
     PIXEL20_1D
@@ -1774,62 +1801,62 @@ _hq3x_16:
     PIXEL02_1M
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag87:
     PIXEL00_1L
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
     PIXEL10_1
     PIXEL11
     PIXEL20_1M
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag79:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
     PIXEL02_1R
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
     PIXEL22_1M
     jmp .loopx_end
 .. at flag122:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag94:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
     PIXEL10_C
     PIXEL11
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag218:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL10_C
     PIXEL11
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag91:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag229:
     PIXEL00_2
@@ -1876,9 +1903,9 @@ _hq3x_16:
     PIXEL22_1D
     jmp .loopx_end
 .. at flag186:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL10_C
     PIXEL11
     PIXEL12_C
@@ -1889,13 +1916,13 @@ _hq3x_16:
 .. at flag115:
     PIXEL00_1L
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL10_1
     PIXEL11
     PIXEL12_C
     PIXEL20_1L
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag93:
     PIXEL00_1U
@@ -1904,18 +1931,18 @@ _hq3x_16:
     PIXEL10_C
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag206:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
     PIXEL02_1R
     PIXEL10_C
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
     PIXEL22_1R
     jmp .loopx_end
@@ -1927,13 +1954,13 @@ _hq3x_16:
     PIXEL10_C
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL20_1M,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_1M,PIXEL20_2
     PIXEL21_C
     PIXEL22_1R
     jmp .loopx_end
 .. at flag174:
 .. at flag46:
-    DiffOrNot w4,w2,PIXEL00_1M,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_1M,PIXEL00_2
     PIXEL01_C
     PIXEL02_1R
     PIXEL10_C
@@ -1947,7 +1974,7 @@ _hq3x_16:
 .. at flag147:
     PIXEL00_1L
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_1M,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_1M,PIXEL02_2
     PIXEL10_1
     PIXEL11
     PIXEL12_C
@@ -1965,7 +1992,7 @@ _hq3x_16:
     PIXEL12_C
     PIXEL20_1L
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_1M,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_1M,PIXEL22_2
     jmp .loopx_end
 .. at flag189:
     PIXEL00_1U
@@ -1991,20 +2018,20 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag126:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_4,PIXEL12_3
     PIXEL11
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag219:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_4,PIXEL01_3,PIXEL10_3
     PIXEL02_1M
     PIXEL11
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag125:
-    DiffOrNot w8,w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
+    DiffOrNot ebp+w8,ebp+w4,PIXEL00_1U,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL00_2,PIXEL10_6,PIXEL20_5,PIXEL21_1
     PIXEL01_1
     PIXEL02_1U
     PIXEL11
@@ -2012,7 +2039,7 @@ _hq3x_16:
     PIXEL22_1M
     jmp .loopx_end
 .. at flag221:
-    DiffOrNot w6,w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
+    DiffOrNot ebp+w6,ebp+w8,PIXEL02_1U,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL02_2,PIXEL12_6,PIXEL21_1,PIXEL22_5
     PIXEL00_1U
     PIXEL01_1
     PIXEL10_C
@@ -2020,7 +2047,7 @@ _hq3x_16:
     PIXEL20_1M
     jmp .loopx_end
 .. at flag207:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL02_1R,PIXEL10_C,PIXEL00_5,PIXEL01_6,PIXEL02_2,PIXEL10_1
     PIXEL11
     PIXEL12_1
     PIXEL20_1M
@@ -2028,7 +2055,7 @@ _hq3x_16:
     PIXEL22_1R
     jmp .loopx_end
 .. at flag238:
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL22_1R,PIXEL10_1,PIXEL20_5,PIXEL21_6,PIXEL22_2
     PIXEL00_1M
     PIXEL01_C
     PIXEL02_1R
@@ -2036,7 +2063,7 @@ _hq3x_16:
     PIXEL12_1
     jmp .loopx_end
 .. at flag190:
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL22_1D,PIXEL01_1,PIXEL02_5,PIXEL12_6,PIXEL22_2
     PIXEL00_1M
     PIXEL10_C
     PIXEL11
@@ -2044,7 +2071,7 @@ _hq3x_16:
     PIXEL21_1
     jmp .loopx_end
 .. at flag187:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL20_1D,PIXEL00_5,PIXEL01_1,PIXEL10_6,PIXEL20_2
     PIXEL02_1M
     PIXEL11
     PIXEL12_C
@@ -2052,7 +2079,7 @@ _hq3x_16:
     PIXEL22_1D
     jmp .loopx_end
 .. at flag243:
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL20_1L,PIXEL21_C,PIXEL22_C,PIXEL12_1,PIXEL20_2,PIXEL21_6,PIXEL22_5
     PIXEL00_1L
     PIXEL01_C
     PIXEL02_1M
@@ -2060,7 +2087,7 @@ _hq3x_16:
     PIXEL11
     jmp .loopx_end
 .. at flag119:
-    DiffOrNot w2,w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
+    DiffOrNot ebp+w2,ebp+w6,PIXEL00_1L,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL00_2,PIXEL01_6,PIXEL02_5,PIXEL12_1
     PIXEL10_1
     PIXEL11
     PIXEL20_1L
@@ -2075,13 +2102,13 @@ _hq3x_16:
     PIXEL10_C
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_C,PIXEL20_2
     PIXEL21_C
     PIXEL22_1R
     jmp .loopx_end
 .. at flag175:
 .. at flag47:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL00_2
     PIXEL01_C
     PIXEL02_1R
     PIXEL10_C
@@ -2095,7 +2122,7 @@ _hq3x_16:
 .. at flag151:
     PIXEL00_1L
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_C,PIXEL02_2
     PIXEL10_1
     PIXEL11
     PIXEL12_C
@@ -2113,30 +2140,30 @@ _hq3x_16:
     PIXEL12_C
     PIXEL20_1L
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_C,PIXEL22_2
     jmp .loopx_end
 .. at flag250:
     PIXEL00_1M
     PIXEL01_C
     PIXEL02_1M
     PIXEL11
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
     jmp .loopx_end
 .. at flag123:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
     PIXEL02_1M
     PIXEL10_C
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag95:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
     PIXEL11
     PIXEL20_1M
     PIXEL21_C
@@ -2144,12 +2171,12 @@ _hq3x_16:
     jmp .loopx_end
 .. at flag222:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
     PIXEL10_C
     PIXEL11
     PIXEL12_C
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag252:
     PIXEL00_1M
@@ -2157,9 +2184,9 @@ _hq3x_16:
     PIXEL02_1U
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_C,PIXEL22_2
     jmp .loopx_end
 .. at flag249:
     PIXEL00_1U
@@ -2167,34 +2194,34 @@ _hq3x_16:
     PIXEL02_1M
     PIXEL10_C
     PIXEL11
-    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_C,PIXEL20_2
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
     jmp .loopx_end
 .. at flag235:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
     PIXEL02_1M
     PIXEL10_C
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_C,PIXEL20_2
     PIXEL21_C
     PIXEL22_1R
     jmp .loopx_end
 .. at flag111:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL00_2
     PIXEL01_C
     PIXEL02_1R
     PIXEL10_C
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag63:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL00_2
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
     PIXEL10_C
     PIXEL11
     PIXEL20_1D
@@ -2202,9 +2229,9 @@ _hq3x_16:
     PIXEL22_1M
     jmp .loopx_end
 .. at flag159:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_C,PIXEL02_2
     PIXEL11
     PIXEL12_C
     PIXEL20_1M
@@ -2214,29 +2241,29 @@ _hq3x_16:
 .. at flag215:
     PIXEL00_1L
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_C,PIXEL02_2
     PIXEL10_1
     PIXEL11
     PIXEL12_C
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag246:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
     PIXEL10_1
     PIXEL11
     PIXEL12_C
     PIXEL20_1L
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_C,PIXEL22_2
     jmp .loopx_end
 .. at flag254:
     PIXEL00_1M
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL01_3,PIXEL02_4
     PIXEL11
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL10_3,PIXEL20_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL21_C,PIXEL22_C,PIXEL12_3,PIXEL21_3,PIXEL22_2
     jmp .loopx_end
 .. at flag253:
     PIXEL00_1U
@@ -2245,39 +2272,39 @@ _hq3x_16:
     PIXEL10_C
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_C,PIXEL20_2
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_C,PIXEL22_2
     jmp .loopx_end
 .. at flag251:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL00_4,PIXEL01_3
     PIXEL02_1M
     PIXEL11
-    DiffOrNot w8,w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_2,PIXEL21_3
-    DiffOrNot w6,w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
+    DiffOrNot ebp+w8,ebp+w4,PIXEL10_C,PIXEL20_C,PIXEL21_C,PIXEL10_3,PIXEL20_2,PIXEL21_3
+    DiffOrNot ebp+w6,ebp+w8,PIXEL12_C,PIXEL22_C,PIXEL12_3,PIXEL22_4
     jmp .loopx_end
 .. at flag239:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL00_2
     PIXEL01_C
     PIXEL02_1R
     PIXEL10_C
     PIXEL11
     PIXEL12_1
-    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_C,PIXEL20_2
     PIXEL21_C
     PIXEL22_1R
     jmp .loopx_end
 .. at flag127:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_2,PIXEL01_3,PIXEL10_3
-    DiffOrNot w2,w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL01_C,PIXEL10_C,PIXEL00_2,PIXEL01_3,PIXEL10_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_C,PIXEL12_C,PIXEL02_4,PIXEL12_3
     PIXEL11
-    DiffOrNot w8,w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_C,PIXEL21_C,PIXEL20_4,PIXEL21_3
     PIXEL22_1M
     jmp .loopx_end
 .. at flag191:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL00_2
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_C,PIXEL02_2
     PIXEL10_C
     PIXEL11
     PIXEL12_C
@@ -2286,33 +2313,33 @@ _hq3x_16:
     PIXEL22_1D
     jmp .loopx_end
 .. at flag223:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
-    DiffOrNot w2,w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_2,PIXEL12_3
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL10_C,PIXEL00_4,PIXEL10_3
+    DiffOrNot ebp+w2,ebp+w6,PIXEL01_C,PIXEL02_C,PIXEL12_C,PIXEL01_3,PIXEL02_2,PIXEL12_3
     PIXEL11
     PIXEL20_1M
-    DiffOrNot w6,w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
+    DiffOrNot ebp+w6,ebp+w8,PIXEL21_C,PIXEL22_C,PIXEL21_3,PIXEL22_4
     jmp .loopx_end
 .. at flag247:
     PIXEL00_1L
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_C,PIXEL02_2
     PIXEL10_1
     PIXEL11
     PIXEL12_C
     PIXEL20_1L
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_C,PIXEL22_2
     jmp .loopx_end
 .. at flag255:
-    DiffOrNot w4,w2,PIXEL00_C,PIXEL00_2
+    DiffOrNot ebp+w4,ebp+w2,PIXEL00_C,PIXEL00_2
     PIXEL01_C
-    DiffOrNot w2,w6,PIXEL02_C,PIXEL02_2
+    DiffOrNot ebp+w2,ebp+w6,PIXEL02_C,PIXEL02_2
     PIXEL10_C
     PIXEL11
     PIXEL12_C
-    DiffOrNot w8,w4,PIXEL20_C,PIXEL20_2
+    DiffOrNot ebp+w8,ebp+w4,PIXEL20_C,PIXEL20_2
     PIXEL21_C
-    DiffOrNot w6,w8,PIXEL22_C,PIXEL22_2
+    DiffOrNot ebp+w6,ebp+w8,PIXEL22_C,PIXEL22_2
     jmp .loopx_end
 
 .. at cross0:
@@ -2330,13 +2357,13 @@ _hq3x_16:
     mov edx,eax
     shl eax,16
     or  eax,edx
-    mov ecx,[w2]
-    and edx,[_hqx_highbits]
-    and ecx,[_hqx_highbits]
+    mov ecx,[ebp+w2]
+    and edx,[ebp+highbits]
+    and ecx,[ebp+highbits]
     add ecx,edx
     shr ecx,1
-    add ecx,[_hqx_lowbits]
-    and ecx,[_hqx_highbits]
+    add ecx,[ebp+lowbits]
+    and ecx,[ebp+highbits]
     add edx,ecx
     shr edx,1
     mov [edi],dx
@@ -2351,13 +2378,13 @@ _hq3x_16:
     mov edx,eax
     shl eax,16
     or  eax,edx
-    mov ecx,[w4]
-    and edx,[_hqx_highbits]
-    and ecx,[_hqx_highbits]
+    mov ecx,[ebp+w4]
+    and edx,[ebp+highbits]
+    and ecx,[ebp+highbits]
     add ecx,edx
     shr ecx,1
-    add ecx,[_hqx_lowbits]
-    and ecx,[_hqx_highbits]
+    add ecx,[ebp+lowbits]
+    and ecx,[ebp+highbits]
     add edx,ecx
     shr edx,1
     mov [edi],dx
@@ -2371,13 +2398,13 @@ _hq3x_16:
     mov edx,eax
     shl eax,16
     or  eax,edx
-    mov ecx,[w6]
-    and edx,[_hqx_highbits]
-    and ecx,[_hqx_highbits]
+    mov ecx,[ebp+w6]
+    and edx,[ebp+highbits]
+    and ecx,[ebp+highbits]
     add ecx,edx
     shr ecx,1
-    add ecx,[_hqx_lowbits]
-    and ecx,[_hqx_highbits]
+    add ecx,[ebp+lowbits]
+    and ecx,[ebp+highbits]
     add edx,ecx
     shr edx,1
     mov [edi],eax
@@ -2391,13 +2418,13 @@ _hq3x_16:
     mov edx,eax
     shl eax,16
     or  eax,edx
-    mov ecx,[w8]
-    and edx,[_hqx_highbits]
-    and ecx,[_hqx_highbits]
+    mov ecx,[ebp+w8]
+    and edx,[ebp+highbits]
+    and ecx,[ebp+highbits]
     add ecx,edx
     shr ecx,1
-    add ecx,[_hqx_lowbits]
-    and ecx,[_hqx_highbits]
+    add ecx,[ebp+lowbits]
+    and ecx,[ebp+highbits]
     add edx,ecx
     shr edx,1
     mov [edi],eax
@@ -2412,18 +2439,18 @@ _hq3x_16:
 .loopx_end:
     add     esi,2
     add     edi,6
-    dec     dword[xcounter]
+    dec     dword[ebp+xcounter]
     jz      .nexty
     jmp     .loopx
 .nexty:
-    add     esi,dword[moduloSrc]
-    add     edi,dword[moduloDst]
-    dec     dword[linesleft]
+    add     esi,dword[ebp+moduloSrc]
+    add     edi,dword[ebp+moduloDst]
+    dec     dword[ebp+linesleft]
     jz      .fin
     mov     ebx,[ebp+srcPitch]
-    mov     dword[nextline],ebx
+    mov     dword[ebp+nextline],ebx
     neg     ebx
-    mov     dword[prevline],ebx
+    mov     dword[ebp+prevline],ebx
     jmp     .loopy
 .fin:
     emms
@@ -2432,46 +2459,49 @@ _hq3x_16:
     pop ebp
     ret
 
-SECTION .data
+_get_pc:
+    mov     eax, dword[esp]
+    ret
+
 FuncTable:
-    dd .. at flag0, .. at flag1, .. at flag2, .. at flag3, .. at flag4, .. at flag5, .. at flag6, .. at flag7
-    dd .. at flag8, .. at flag9, .. at flag10, .. at flag11, .. at flag12, .. at flag13, .. at flag14, .. at flag15
-    dd .. at flag16, .. at flag17, .. at flag18, .. at flag19, .. at flag20, .. at flag21, .. at flag22, .. at flag23
-    dd .. at flag24, .. at flag25, .. at flag26, .. at flag27, .. at flag28, .. at flag29, .. at flag30, .. at flag31
-    dd .. at flag32, .. at flag33, .. at flag34, .. at flag35, .. at flag36, .. at flag37, .. at flag38, .. at flag39
-    dd .. at flag40, .. at flag41, .. at flag42, .. at flag43, .. at flag44, .. at flag45, .. at flag46, .. at flag47
-    dd .. at flag48, .. at flag49, .. at flag50, .. at flag51, .. at flag52, .. at flag53, .. at flag54, .. at flag55
-    dd .. at flag56, .. at flag57, .. at flag58, .. at flag59, .. at flag60, .. at flag61, .. at flag62, .. at flag63
-    dd .. at flag64, .. at flag65, .. at flag66, .. at flag67, .. at flag68, .. at flag69, .. at flag70, .. at flag71
-    dd .. at flag72, .. at flag73, .. at flag74, .. at flag75, .. at flag76, .. at flag77, .. at flag78, .. at flag79
-    dd .. at flag80, .. at flag81, .. at flag82, .. at flag83, .. at flag84, .. at flag85, .. at flag86, .. at flag87
-    dd .. at flag88, .. at flag89, .. at flag90, .. at flag91, .. at flag92, .. at flag93, .. at flag94, .. at flag95
-    dd .. at flag96, .. at flag97, .. at flag98, .. at flag99, .. at flag100, .. at flag101, .. at flag102, .. at flag103
-    dd .. at flag104, .. at flag105, .. at flag106, .. at flag107, .. at flag108, .. at flag109, .. at flag110, .. at flag111
-    dd .. at flag112, .. at flag113, .. at flag114, .. at flag115, .. at flag116, .. at flag117, .. at flag118, .. at flag119
-    dd .. at flag120, .. at flag121, .. at flag122, .. at flag123, .. at flag124, .. at flag125, .. at flag126, .. at flag127
-    dd .. at flag128, .. at flag129, .. at flag130, .. at flag131, .. at flag132, .. at flag133, .. at flag134, .. at flag135
-    dd .. at flag136, .. at flag137, .. at flag138, .. at flag139, .. at flag140, .. at flag141, .. at flag142, .. at flag143
-    dd .. at flag144, .. at flag145, .. at flag146, .. at flag147, .. at flag148, .. at flag149, .. at flag150, .. at flag151
-    dd .. at flag152, .. at flag153, .. at flag154, .. at flag155, .. at flag156, .. at flag157, .. at flag158, .. at flag159
-    dd .. at flag160, .. at flag161, .. at flag162, .. at flag163, .. at flag164, .. at flag165, .. at flag166, .. at flag167
-    dd .. at flag168, .. at flag169, .. at flag170, .. at flag171, .. at flag172, .. at flag173, .. at flag174, .. at flag175
-    dd .. at flag176, .. at flag177, .. at flag178, .. at flag179, .. at flag180, .. at flag181, .. at flag182, .. at flag183
-    dd .. at flag184, .. at flag185, .. at flag186, .. at flag187, .. at flag188, .. at flag189, .. at flag190, .. at flag191
-    dd .. at flag192, .. at flag193, .. at flag194, .. at flag195, .. at flag196, .. at flag197, .. at flag198, .. at flag199
-    dd .. at flag200, .. at flag201, .. at flag202, .. at flag203, .. at flag204, .. at flag205, .. at flag206, .. at flag207
-    dd .. at flag208, .. at flag209, .. at flag210, .. at flag211, .. at flag212, .. at flag213, .. at flag214, .. at flag215
-    dd .. at flag216, .. at flag217, .. at flag218, .. at flag219, .. at flag220, .. at flag221, .. at flag222, .. at flag223
-    dd .. at flag224, .. at flag225, .. at flag226, .. at flag227, .. at flag228, .. at flag229, .. at flag230, .. at flag231
-    dd .. at flag232, .. at flag233, .. at flag234, .. at flag235, .. at flag236, .. at flag237, .. at flag238, .. at flag239
-    dd .. at flag240, .. at flag241, .. at flag242, .. at flag243, .. at flag244, .. at flag245, .. at flag246, .. at flag247
-    dd .. at flag248, .. at flag249, .. at flag250, .. at flag251, .. at flag252, .. at flag253, .. at flag254, .. at flag255
+    dd .. at flag0 - FuncTable, .. at flag1 - FuncTable, .. at flag2 - FuncTable, .. at flag3 - FuncTable, .. at flag4 - FuncTable, .. at flag5 - FuncTable, .. at flag6 - FuncTable, .. at flag7 - FuncTable
+    dd .. at flag8 - FuncTable, .. at flag9 - FuncTable, .. at flag10 - FuncTable, .. at flag11 - FuncTable, .. at flag12 - FuncTable, .. at flag13 - FuncTable, .. at flag14 - FuncTable, .. at flag15 - FuncTable
+    dd .. at flag16 - FuncTable, .. at flag17 - FuncTable, .. at flag18 - FuncTable, .. at flag19 - FuncTable, .. at flag20 - FuncTable, .. at flag21 - FuncTable, .. at flag22 - FuncTable, .. at flag23 - FuncTable
+    dd .. at flag24 - FuncTable, .. at flag25 - FuncTable, .. at flag26 - FuncTable, .. at flag27 - FuncTable, .. at flag28 - FuncTable, .. at flag29 - FuncTable, .. at flag30 - FuncTable, .. at flag31 - FuncTable
+    dd .. at flag32 - FuncTable, .. at flag33 - FuncTable, .. at flag34 - FuncTable, .. at flag35 - FuncTable, .. at flag36 - FuncTable, .. at flag37 - FuncTable, .. at flag38 - FuncTable, .. at flag39 - FuncTable
+    dd .. at flag40 - FuncTable, .. at flag41 - FuncTable, .. at flag42 - FuncTable, .. at flag43 - FuncTable, .. at flag44 - FuncTable, .. at flag45 - FuncTable, .. at flag46 - FuncTable, .. at flag47 - FuncTable
+    dd .. at flag48 - FuncTable, .. at flag49 - FuncTable, .. at flag50 - FuncTable, .. at flag51 - FuncTable, .. at flag52 - FuncTable, .. at flag53 - FuncTable, .. at flag54 - FuncTable, .. at flag55 - FuncTable
+    dd .. at flag56 - FuncTable, .. at flag57 - FuncTable, .. at flag58 - FuncTable, .. at flag59 - FuncTable, .. at flag60 - FuncTable, .. at flag61 - FuncTable, .. at flag62 - FuncTable, .. at flag63 - FuncTable
+    dd .. at flag64 - FuncTable, .. at flag65 - FuncTable, .. at flag66 - FuncTable, .. at flag67 - FuncTable, .. at flag68 - FuncTable, .. at flag69 - FuncTable, .. at flag70 - FuncTable, .. at flag71 - FuncTable
+    dd .. at flag72 - FuncTable, .. at flag73 - FuncTable, .. at flag74 - FuncTable, .. at flag75 - FuncTable, .. at flag76 - FuncTable, .. at flag77 - FuncTable, .. at flag78 - FuncTable, .. at flag79 - FuncTable
+    dd .. at flag80 - FuncTable, .. at flag81 - FuncTable, .. at flag82 - FuncTable, .. at flag83 - FuncTable, .. at flag84 - FuncTable, .. at flag85 - FuncTable, .. at flag86 - FuncTable, .. at flag87 - FuncTable
+    dd .. at flag88 - FuncTable, .. at flag89 - FuncTable, .. at flag90 - FuncTable, .. at flag91 - FuncTable, .. at flag92 - FuncTable, .. at flag93 - FuncTable, .. at flag94 - FuncTable, .. at flag95 - FuncTable
+    dd .. at flag96 - FuncTable, .. at flag97 - FuncTable, .. at flag98 - FuncTable, .. at flag99 - FuncTable, .. at flag100 - FuncTable, .. at flag101 - FuncTable, .. at flag102 - FuncTable, .. at flag103 - FuncTable
+    dd .. at flag104 - FuncTable, .. at flag105 - FuncTable, .. at flag106 - FuncTable, .. at flag107 - FuncTable, .. at flag108 - FuncTable, .. at flag109 - FuncTable, .. at flag110 - FuncTable, .. at flag111 - FuncTable
+    dd .. at flag112 - FuncTable, .. at flag113 - FuncTable, .. at flag114 - FuncTable, .. at flag115 - FuncTable, .. at flag116 - FuncTable, .. at flag117 - FuncTable, .. at flag118 - FuncTable, .. at flag119 - FuncTable
+    dd .. at flag120 - FuncTable, .. at flag121 - FuncTable, .. at flag122 - FuncTable, .. at flag123 - FuncTable, .. at flag124 - FuncTable, .. at flag125 - FuncTable, .. at flag126 - FuncTable, .. at flag127 - FuncTable
+    dd .. at flag128 - FuncTable, .. at flag129 - FuncTable, .. at flag130 - FuncTable, .. at flag131 - FuncTable, .. at flag132 - FuncTable, .. at flag133 - FuncTable, .. at flag134 - FuncTable, .. at flag135 - FuncTable
+    dd .. at flag136 - FuncTable, .. at flag137 - FuncTable, .. at flag138 - FuncTable, .. at flag139 - FuncTable, .. at flag140 - FuncTable, .. at flag141 - FuncTable, .. at flag142 - FuncTable, .. at flag143 - FuncTable
+    dd .. at flag144 - FuncTable, .. at flag145 - FuncTable, .. at flag146 - FuncTable, .. at flag147 - FuncTable, .. at flag148 - FuncTable, .. at flag149 - FuncTable, .. at flag150 - FuncTable, .. at flag151 - FuncTable
+    dd .. at flag152 - FuncTable, .. at flag153 - FuncTable, .. at flag154 - FuncTable, .. at flag155 - FuncTable, .. at flag156 - FuncTable, .. at flag157 - FuncTable, .. at flag158 - FuncTable, .. at flag159 - FuncTable
+    dd .. at flag160 - FuncTable, .. at flag161 - FuncTable, .. at flag162 - FuncTable, .. at flag163 - FuncTable, .. at flag164 - FuncTable, .. at flag165 - FuncTable, .. at flag166 - FuncTable, .. at flag167 - FuncTable
+    dd .. at flag168 - FuncTable, .. at flag169 - FuncTable, .. at flag170 - FuncTable, .. at flag171 - FuncTable, .. at flag172 - FuncTable, .. at flag173 - FuncTable, .. at flag174 - FuncTable, .. at flag175 - FuncTable
+    dd .. at flag176 - FuncTable, .. at flag177 - FuncTable, .. at flag178 - FuncTable, .. at flag179 - FuncTable, .. at flag180 - FuncTable, .. at flag181 - FuncTable, .. at flag182 - FuncTable, .. at flag183 - FuncTable
+    dd .. at flag184 - FuncTable, .. at flag185 - FuncTable, .. at flag186 - FuncTable, .. at flag187 - FuncTable, .. at flag188 - FuncTable, .. at flag189 - FuncTable, .. at flag190 - FuncTable, .. at flag191 - FuncTable
+    dd .. at flag192 - FuncTable, .. at flag193 - FuncTable, .. at flag194 - FuncTable, .. at flag195 - FuncTable, .. at flag196 - FuncTable, .. at flag197 - FuncTable, .. at flag198 - FuncTable, .. at flag199 - FuncTable
+    dd .. at flag200 - FuncTable, .. at flag201 - FuncTable, .. at flag202 - FuncTable, .. at flag203 - FuncTable, .. at flag204 - FuncTable, .. at flag205 - FuncTable, .. at flag206 - FuncTable, .. at flag207 - FuncTable
+    dd .. at flag208 - FuncTable, .. at flag209 - FuncTable, .. at flag210 - FuncTable, .. at flag211 - FuncTable, .. at flag212 - FuncTable, .. at flag213 - FuncTable, .. at flag214 - FuncTable, .. at flag215 - FuncTable
+    dd .. at flag216 - FuncTable, .. at flag217 - FuncTable, .. at flag218 - FuncTable, .. at flag219 - FuncTable, .. at flag220 - FuncTable, .. at flag221 - FuncTable, .. at flag222 - FuncTable, .. at flag223 - FuncTable
+    dd .. at flag224 - FuncTable, .. at flag225 - FuncTable, .. at flag226 - FuncTable, .. at flag227 - FuncTable, .. at flag228 - FuncTable, .. at flag229 - FuncTable, .. at flag230 - FuncTable, .. at flag231 - FuncTable
+    dd .. at flag232 - FuncTable, .. at flag233 - FuncTable, .. at flag234 - FuncTable, .. at flag235 - FuncTable, .. at flag236 - FuncTable, .. at flag237 - FuncTable, .. at flag238 - FuncTable, .. at flag239 - FuncTable
+    dd .. at flag240 - FuncTable, .. at flag241 - FuncTable, .. at flag242 - FuncTable, .. at flag243 - FuncTable, .. at flag244 - FuncTable, .. at flag245 - FuncTable, .. at flag246 - FuncTable, .. at flag247 - FuncTable
+    dd .. at flag248 - FuncTable, .. at flag249 - FuncTable, .. at flag250 - FuncTable, .. at flag251 - FuncTable, .. at flag252 - FuncTable, .. at flag253 - FuncTable, .. at flag254 - FuncTable, .. at flag255 - FuncTable
 
 FuncTable2:
-    dd .. at cross0, .. at cross1, .. at cross2, .. at flag0,
-    dd .. at cross4, .. at flag0,  .. at flag0,  .. at flag0,
-    dd .. at cross8, .. at flag0,  .. at flag0,  .. at flag0,
-    dd .. at flag0,  .. at flag0,  .. at flag0,  .. at flag0
+    dd .. at cross0 - FuncTable2, .. at cross1 - FuncTable2, .. at cross2 - FuncTable2, .. at flag0 - FuncTable2,
+    dd .. at cross4 - FuncTable2, .. at flag0  - FuncTable2, .. at flag0  - FuncTable2, .. at flag0 - FuncTable2,
+    dd .. at cross8 - FuncTable2, .. at flag0  - FuncTable2, .. at flag0  - FuncTable2, .. at flag0 - FuncTable2,
+    dd .. at flag0  - FuncTable2, .. at flag0  - FuncTable2, .. at flag0  - FuncTable2, .. at flag0 - FuncTable2
 
 
 %ifidn __OUTPUT_FORMAT__,elf


Commit: 5f86c391ec5dca27b5acef6d63f91189336f4e4b
    https://github.com/scummvm/scummvm/commit/5f86c391ec5dca27b5acef6d63f91189336f4e4b
Author: Le Philousophe (lephilousophe at users.noreply.github.com)
Date: 2022-01-03T10:52:11+01:00

Commit Message:
GRAPHICS: Remove global variables of HQ scaler

This allows multiple instances to run in parallel

Changed paths:
    graphics/scaler/hq.cpp
    graphics/scaler/hq.h


diff --git a/graphics/scaler/hq.cpp b/graphics/scaler/hq.cpp
index a4be89203f0..2b92e8e358e 100644
--- a/graphics/scaler/hq.cpp
+++ b/graphics/scaler/hq.cpp
@@ -24,13 +24,39 @@
 
 // RGB-to-YUV lookup table
 
+#ifdef USE_NASM
+// NOTE: if your compiler uses different mangled names, add another
+//       condition here
+struct hqx_parameters {
+	uint32 *RGBtoYUV;
+	uint32 highbits;
+	uint32 lowbits;
+	uint32 low2bits;
+	uint32 low3bits;
+	uint32 greenMask;
+	uint32 redBlueMask;
+	uint32 green_redBlue_Mask;
+};
+
+// Assembly versions of HQ2x and HQ3x
+
+extern "C" {
+
+#if !defined(_WIN32) && !defined(MACOSX) && !defined(__OS2__)
+#define hq2x_16 _hq2x_16
+#define hq3x_16 _hq3x_16
+#endif
+
+void hq2x_16(const byte *, byte *, uint32, uint32, uint32, uint32, struct hqx_parameters *);
+void hq3x_16(const byte *, byte *, uint32, uint32, uint32, uint32, struct hqx_parameters *);
+
+}
+#endif
+
 /**
  * 16bit RGB to YUV conversion table. This table is setup by InitLUT().
  * Used by the hq scaler family.
  *
- * FIXME/TODO: The RGBtoYUV table sucks up 256 KB. This is bad.
- * In addition we never free it...
- *
  * Note: a memory lookup table is *not* necessarily faster than computing
  * these things on the fly, because of its size. The table together with
  * the code, plus the input/output GFX data, may not fit in the cache on some
@@ -45,83 +71,45 @@
  * differences are likely to vary a lot between different architectures and
  * CPUs.
  */
-static uint32 *RGBtoYUV = 0;
-
-#ifdef USE_NASM
-// NOTE: if your compiler uses different mangled names, add another
-//       condition here
-typedef struct {
-	uint32 *RGBtoYUV;
-	uint32 highbits;
-	uint32 lowbits;
-	uint32 low2bits;
-	uint32 low3bits;
-	uint32 greenMask;
-	uint32 redBlueMask;
-	uint32 green_redBlue_Mask;
-} hqx_parameters;
-
-static hqx_parameters hqx_params;
-#endif
-
-void InitLUT(Graphics::PixelFormat format) {
+void HQScaler::initLUT(Graphics::PixelFormat format) {
 	uint8 r, g, b;
 	int Y, u, v;
 
 	assert(format.bytesPerPixel == 2);
 
 	// Allocate the YUV/LUT buffers on the fly if needed.
-	if (RGBtoYUV == 0)
-		RGBtoYUV = (uint32 *)malloc(65536 * sizeof(uint32));
-
-	if (!RGBtoYUV)
-		error("[InitLUT] Cannot allocate memory for YUV/LUT buffers");
-
+	if (!_RGBtoYUV) {
+		_RGBtoYUV = new uint32[65536];
+	}
 	for (int color = 0; color < 65536; ++color) {
 		format.colorToRGB(color, r, g, b);
 		Y = (r + g + b) >> 2;
 		u = 128 + ((r - b) >> 2);
 		v = 128 + ((-r + 2 * g - b) >> 3);
-		RGBtoYUV[color] = (Y << 16) | (u << 8) | v;
+		_RGBtoYUV[color] = (Y << 16) | (u << 8) | v;
 	}
 
 #ifdef USE_NASM
-	hqx_params.lowbits  = (1 << format.rShift) | (1 << format.gShift) | (1 << format.bShift),
-	hqx_params.low2bits = (3 << format.rShift) | (3 << format.gShift) | (3 << format.bShift),
-	hqx_params.low3bits = (7 << format.rShift) | (7 << format.gShift) | (7 << format.bShift),
+	if (!_hqx_params) {
+		_hqx_params = new hqx_parameters;
+	}
+	_hqx_params->lowbits  = (1 << format.rShift) | (1 << format.gShift) | (1 << format.bShift),
+	_hqx_params->low2bits = (3 << format.rShift) | (3 << format.gShift) | (3 << format.bShift),
+	_hqx_params->low3bits = (7 << format.rShift) | (7 << format.gShift) | (7 << format.bShift),
 
-	hqx_params.highbits = format.RGBToColor(255,255,255) ^ hqx_params.lowbits;
+	_hqx_params->highbits = format.RGBToColor(255,255,255) ^ _hqx_params->lowbits;
 
 	// FIXME: The following code only does the right thing
 	// if the color order is RGB or BGR, i.e., green is in the middle.
-	hqx_params.greenMask = format.RGBToColor(0,255,0);
-	hqx_params.redBlueMask = format.RGBToColor(255,0,255);
-
-	hqx_params.green_redBlue_Mask = (hqx_params.greenMask << 16) | hqx_params.redBlueMask;
-
-	hqx_params.RGBtoYUV = RGBtoYUV;
-#endif
-}
-
-#ifdef USE_NASM
-// Assembly version of HQ2x
+	_hqx_params->greenMask = format.RGBToColor(0,255,0);
+	_hqx_params->redBlueMask = format.RGBToColor(255,0,255);
 
-extern "C" {
+	_hqx_params->green_redBlue_Mask = (_hqx_params->greenMask << 16) | _hqx_params->redBlueMask;
 
-#if !defined(_WIN32) && !defined(MACOSX) && !defined(__OS2__)
-#define hq2x_16 _hq2x_16
+	_hqx_params->RGBtoYUV = _RGBtoYUV;
 #endif
-
-void hq2x_16(const byte *, byte *, uint32, uint32, uint32, uint32, hqx_parameters *);
-
 }
 
-void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-	hq2x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch, &hqx_params);
-}
-
-#endif
-
 #define interpolate_1_1(a,b)         (ColorMask::kBytesPerPixel == 2 ? interpolate16_1_1<ColorMask>(a,b) : interpolate32_1_1<ColorMask>(a,b))
 #define interpolate_3_1(a,b)         (ColorMask::kBytesPerPixel == 2 ? interpolate16_3_1<ColorMask>(a,b) : interpolate32_3_1<ColorMask>(a,b))
 #define interpolate_7_1(a,b)         (ColorMask::kBytesPerPixel == 2 ? interpolate16_7_1<ColorMask>(a,b) : interpolate32_7_1<ColorMask>(a,b))
@@ -185,13 +173,13 @@ void HQ2x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
 #define PIXEL11_90	*(q+1+nextlineDst) = interpolate_2_3_3(w5, w6, w8);
 #define PIXEL11_100	*(q+1+nextlineDst) = interpolate_14_1_1(w5, w6, w8);
 
-#define YUV(x)	(sizeof(Pixel) == 2 ? RGBtoYUV[w ## x] : ConvertYUV<ColorMask>(w ## x))
+#define YUV(x)	(sizeof(Pixel) == 2 ? RGBtoYUV[w ## x] : ConvertYUV<ColorMask>(w ## x, RGBtoYUV))
 
 /**
  * Convert 32 bit RGB values to Yuv
  */
 template<typename ColorMask>
-static inline uint32 ConvertYUV(uint32 x) {
+static inline uint32 ConvertYUV(uint32 x, const uint32 *RGBtoYUV) {
 	int r, g, b;
 
 	r = (ColorMask::kRedMask & (ColorMask::kRedMask << (8 - Graphics::ColorMasks<565>::kRedBits)) & x)
@@ -211,7 +199,7 @@ static inline uint32 ConvertYUV(uint32 x) {
  * Adapted for ScummVM to 16 bit output and optimized by Max Horn.
  */
 template<typename ColorMask>
-static void HQ2x_implementation(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+static void HQ2x_implementation(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height, const uint32 *RGBtoYUV) {
 	typedef typename ColorMask::PixelType Pixel;
 
 	int w1, w2, w3, w4, w5, w6, w7, w8, w9;
@@ -2068,26 +2056,6 @@ static void HQ2x_implementation(const uint8 *srcPtr, uint32 srcPitch, uint8 *dst
 	}
 }
 
-#ifdef USE_NASM
-// Assembly version of HQ3x
-
-extern "C" {
-
-#if !defined(_WIN32) && !defined(MACOSX) && !defined(__OS2__)
-#define hq3x_16 _hq3x_16
-#endif
-
-
-void hq3x_16(const byte *, byte *, uint32, uint32, uint32, uint32, hqx_parameters *);
-
-}
-
-void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
-	hq3x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch, &hqx_params);
-}
-
-#endif
-
 #define PIXEL00_1M  *(q) = interpolate_3_1(w5, w1);
 #define PIXEL00_1U  *(q) = interpolate_3_1(w5, w2);
 #define PIXEL00_1L  *(q) = interpolate_3_1(w5, w4);
@@ -2148,7 +2116,7 @@ void HQ3x(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch,
  * Adapted for ScummVM to 16 bit output and optimized by Max Horn.
  */
 template<typename ColorMask>
-static void HQ3x_implementation(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+static void HQ3x_implementation(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height, const uint32 *RGBtoYUV) {
 	typedef typename ColorMask::PixelType Pixel;
 
 	int  w1, w2, w3, w4, w5, w6, w7, w8, w9;
@@ -4979,72 +4947,90 @@ static void HQ3x_implementation(const uint8 *srcPtr, uint32 srcPitch, uint8 *dst
 	}
 }
 
-HQScaler::HQScaler(const Graphics::PixelFormat &format) : Scaler(format) {
+HQScaler::HQScaler(const Graphics::PixelFormat &format) : Scaler(format),
+#ifdef USE_NASM
+	_hqx_params(nullptr),
+#endif
+	_RGBtoYUV(nullptr) {
 	_factor = 2;
 
 	if (format.bytesPerPixel == 2) {
-		InitLUT(format);
+		initLUT(format);
 	} else {
 		// Pass a 16 bit 565 format to InitLut
 		Graphics::PixelFormat format16(2,
 		                               5, 6, 5, 0,
 		                               11, 5, 0, 0);
-		InitLUT(format16);
+		initLUT(format16);
 	}
 }
 
 HQScaler::~HQScaler() {
-	free(RGBtoYUV);
-	RGBtoYUV = 0;
+	delete[] _RGBtoYUV;
+	_RGBtoYUV = nullptr;
+
+#ifdef USE_NASM
+	delete _hqx_params;
+	_hqx_params = nullptr;
+#endif
+}
+
+#ifdef USE_NASM
+void HQScaler::HQ2x16(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+	hq2x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch, _hqx_params);
+}
+
+void HQScaler::HQ3x16(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+	hq3x_16(srcPtr, dstPtr, width, height, srcPitch, dstPitch, _hqx_params);
+}
+#else
+void HQScaler::HQ2x16(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+	if (_format.gLoss == 2)
+		HQ2x_implementation<Graphics::ColorMasks<565> >(srcPtr, srcPitch, dstPtr,
+				dstPitch, width, height, _RGBtoYUV);
+	else
+		HQ2x_implementation<Graphics::ColorMasks<555> >(srcPtr, srcPitch, dstPtr,
+				dstPitch, width, height, _RGBtoYUV);
 }
 
+void HQScaler::HQ3x16(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height) {
+	if (_format.gLoss == 2)
+		HQ3x_implementation<Graphics::ColorMasks<565> >(srcPtr, srcPitch, dstPtr,
+				dstPitch, width, height, _RGBtoYUV);
+	else
+		HQ3x_implementation<Graphics::ColorMasks<555> >(srcPtr, srcPitch, dstPtr,
+				dstPitch, width, height, _RGBtoYUV);
+}
+#endif
+
 void HQScaler::scaleIntern(const uint8 *srcPtr, uint32 srcPitch,
 							uint8 *dstPtr, uint32 dstPitch, int width, int height, int x, int y) {
 	if (_format.bytesPerPixel == 2) {
 		switch (_factor) {
-#ifdef USE_NASM
-		case 2:
-			HQ2x(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
-			break;
-		case 3:
-			HQ3x(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
-			break;
-#else
 		case 2:
-			if (_format.gLoss == 2)
-				HQ2x_implementation<Graphics::ColorMasks<565> >(srcPtr, srcPitch, dstPtr,
-						dstPitch, width, height);
-			else
-				HQ2x_implementation<Graphics::ColorMasks<555> >(srcPtr, srcPitch, dstPtr,
-						dstPitch, width, height);
+			HQ2x16(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
 			break;
 		case 3:
-			if (_format.gLoss == 2)
-				HQ3x_implementation<Graphics::ColorMasks<565> >(srcPtr, srcPitch, dstPtr,
-						dstPitch, width, height);
-			else
-				HQ3x_implementation<Graphics::ColorMasks<555> >(srcPtr, srcPitch, dstPtr,
-						dstPitch, width, height);
+			HQ3x16(srcPtr, srcPitch, dstPtr, dstPitch, width, height);
 			break;
-#endif
 		}
 	} else {
 		switch (_factor) {
 		case 2:
 			if (_format.aLoss == 0)
 				HQ2x_implementation<Graphics::ColorMasks<8888> >(srcPtr, srcPitch, dstPtr,
-						dstPitch, width, height);
+						dstPitch, width, height, _RGBtoYUV);
 			else
 				HQ2x_implementation<Graphics::ColorMasks<888> >(srcPtr, srcPitch, dstPtr,
-						dstPitch, width, height);
+						dstPitch, width, height, _RGBtoYUV);
 			break;
 		case 3:
 			if (_format.aLoss == 0)
 				HQ3x_implementation<Graphics::ColorMasks<8888> >(srcPtr, srcPitch, dstPtr,
-						dstPitch, width, height);
+						dstPitch, width, height, _RGBtoYUV);
 			else
 				HQ3x_implementation<Graphics::ColorMasks<888> >(srcPtr, srcPitch, dstPtr,
-						dstPitch, width, height);
+						dstPitch, width, height, _RGBtoYUV);
 			break;
 		}
 	}
diff --git a/graphics/scaler/hq.h b/graphics/scaler/hq.h
index 7a4e0df98c2..f9c725826a3 100644
--- a/graphics/scaler/hq.h
+++ b/graphics/scaler/hq.h
@@ -23,6 +23,10 @@
 
 #include "graphics/scalerplugin.h"
 
+#ifdef USE_NASM
+struct hqx_parameters;
+#endif
+
 class HQScaler : public Scaler {
 public:
 	HQScaler(const Graphics::PixelFormat &format);
@@ -32,6 +36,16 @@ public:
 protected:
 	virtual void scaleIntern(const uint8 *srcPtr, uint32 srcPitch,
 							uint8 *dstPtr, uint32 dstPitch, int width, int height, int x, int y) override;
+
+	void initLUT(Graphics::PixelFormat format);
+	inline void HQ2x16(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+	inline void HQ3x16(const uint8 *srcPtr, uint32 srcPitch, uint8 *dstPtr, uint32 dstPitch, int width, int height);
+
+	uint32 *_RGBtoYUV;
+#ifdef USE_NASM
+	hqx_parameters *_hqx_params;
+#endif
+
 };
 
 




More information about the Scummvm-git-logs mailing list