modern_lisp-machine/.config/mpv/shaders/FSR.glsl

// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

// FidelityFX FSR v1.0.2 by AMD
// ported to mpv by agyild

// Changelog
// Made it compatible with pre-OpenGL 4.0 renderers
// Made it directly operate on LUMA plane, since the original shader was operating on LUMA by deriving it from RGB. This should cause a major increase in performance, especially on OpenGL 4.0+ renderers (4+2 texture lookups vs. 12+5)
// Removed transparency preservation mechanism since the alpha channel is a separate source plane than LUMA
// Added optional performance-saving lossy optimizations to EASU (Credit: atyuwen, https://atyuwen.github.io/posts/optimizing-fsr/)
// 
// Notes
// Per AMD's guidelines only upscales content up to 4x (e.g., 1080p -> 2160p, 720p -> 1440p etc.) and everything else in between,
// that means FSR will scale up to 4x at maximum, and any further scaling will be processed by mpv's scalers

//!HOOK LUMA
//!BIND HOOKED
//!SAVE EASUTEX
//!DESC FidelityFX Super Resolution v1.0.2 (EASU)
//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 >
//!WIDTH OUTPUT.w OUTPUT.w LUMA.w 2 * < * LUMA.w 2 * OUTPUT.w LUMA.w 2 * > * + OUTPUT.w OUTPUT.w LUMA.w 2 * = * +
//!HEIGHT OUTPUT.h OUTPUT.h LUMA.h 2 * < * LUMA.h 2 * OUTPUT.h LUMA.h 2 * > * + OUTPUT.h OUTPUT.h LUMA.h 2 * = * +
//!COMPONENTS 1

// User variables - EASU
#define FSR_PQ 0 // Whether the source content has PQ gamma or not. Needs to be set to the same value for both passes. 0 or 1.
#define FSR_EASU_DERING 1 // If set to 0, disables deringing for a small increase in performance. 0 or 1.
#define FSR_EASU_SIMPLE_ANALYSIS 0 // If set to 1, uses a simpler single-pass direction and length analysis for an increase in performance. 0 or 1.
#define FSR_EASU_QUIT_EARLY 0 // If set to 1, uses bilinear filtering for non-edge pixels and skips EASU on those regions for an increase in performance. 0 or 1.

// Shader code

#ifndef FSR_EASU_DIR_THRESHOLD
	#if (FSR_EASU_QUIT_EARLY == 1)
		#define FSR_EASU_DIR_THRESHOLD 64.0
	#elif (FSR_EASU_QUIT_EARLY == 0)
		#define FSR_EASU_DIR_THRESHOLD 32768.0
	#endif
#endif

float APrxLoRcpF1(float a) {
	return uintBitsToFloat(uint(0x7ef07ebb) - floatBitsToUint(a));
}

float APrxLoRsqF1(float a) {
	return uintBitsToFloat(uint(0x5f347d74) - (floatBitsToUint(a) >> uint(1)));
}

float AMin3F1(float x, float y, float z) {
	return min(x, min(y, z));
}

float AMax3F1(float x, float y, float z) {
	return max(x, max(y, z));
}

#if (FSR_PQ == 1)

float ToGamma2(float a) { 
	return pow(a, 4.0);
}

#endif

 // Filtering for a given tap for the scalar.
 void FsrEasuTap(
	inout float aC,	// Accumulated color, with negative lobe.
	inout float aW, // Accumulated weight.
	vec2 off,       // Pixel offset from resolve position to tap.
	vec2 dir,       // Gradient direction.
	vec2 len,       // Length.
	float lob,      // Negative lobe strength.
	float clp,		// Clipping point.
	float c){		// Tap color.
	// Rotate offset by direction.
	vec2 v;
	v.x = (off.x * ( dir.x)) + (off.y * dir.y);
	v.y = (off.x * (-dir.y)) + (off.y * dir.x);
	// Anisotropy.
	v *= len;
	// Compute distance^2.
	float d2 = v.x * v.x + v.y * v.y;
	// Limit to the window as at corner, 2 taps can easily be outside.
	d2 = min(d2, clp);
	// Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
	//  (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
	//  |_______________________________________|   |_______________|
	//                   base                             window
	// The general form of the 'base' is,
	//  (a*(b*x^2-1)^2-(a-1))
	// Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
	float wB = float(2.0 / 5.0) * d2 + -1.0;
	float wA = lob * d2 + -1.0;
	wB *= wB;
	wA *= wA;
	wB = float(25.0 / 16.0) * wB + float(-(25.0 / 16.0 - 1.0));
	float w = wB * wA;
	// Do weighted average.
	aC += c * w;
	aW += w;
}

// Accumulate direction and length.
void FsrEasuSet(
	inout vec2 dir,
	inout float len,
	vec2 pp,
#if (FSR_EASU_SIMPLE_ANALYSIS == 1)
	float b, float c,
	float i, float j, float f, float e,
	float k, float l, float h, float g,
	float o, float n
#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)
	bool biS, bool biT, bool biU, bool biV,
	float lA, float lB, float lC, float lD, float lE
#endif
	){
	// Compute bilinear weight, branches factor out as predicates are compiler time immediates.
	//  s t
	//  u v
#if (FSR_EASU_SIMPLE_ANALYSIS == 1)
	vec4 w = vec4(0.0);
	w.x = (1.0 - pp.x) * (1.0 - pp.y);
	w.y =        pp.x  * (1.0 - pp.y);
	w.z = (1.0 - pp.x) *        pp.y;
	w.w =        pp.x  *        pp.y;

	float lA = dot(w, vec4(b, c, f, g));
	float lB = dot(w, vec4(e, f, i, j));
	float lC = dot(w, vec4(f, g, j, k));
	float lD = dot(w, vec4(g, h, k, l));
	float lE = dot(w, vec4(j, k, n, o));
#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)
	float w = 0.0;
	if (biS)
		w = (1.0 - pp.x) * (1.0 - pp.y);
	if (biT)
		w =        pp.x  * (1.0 - pp.y);
	if (biU)
		w = (1.0 - pp.x) *        pp.y;
	if (biV)
		w =        pp.x  *        pp.y;
#endif
	// Direction is the '+' diff.
	//    a
	//  b c d
	//    e
	// Then takes magnitude from abs average of both sides of 'c'.
	// Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
	float dc = lD - lC;
	float cb = lC - lB;
	float lenX = max(abs(dc), abs(cb));
	lenX = APrxLoRcpF1(lenX);
	float dirX = lD - lB;
	lenX = clamp(abs(dirX) * lenX, 0.0, 1.0);
	lenX *= lenX;
	// Repeat for the y axis.
	float ec = lE - lC;
	float ca = lC - lA;
	float lenY = max(abs(ec), abs(ca));
	lenY = APrxLoRcpF1(lenY);
	float dirY = lE - lA;
	lenY = clamp(abs(dirY) * lenY, 0.0, 1.0);
	lenY *= lenY;
#if (FSR_EASU_SIMPLE_ANALYSIS == 1)
	len = lenX + lenY;
	dir = vec2(dirX, dirY);
#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)
	dir += vec2(dirX, dirY) * w;
	len += dot(vec2(w), vec2(lenX, lenY));
#endif
}

vec4 hook() {
	// Result
	vec4 pix = vec4(0.0, 0.0, 0.0, 1.0);

	//------------------------------------------------------------------------------------------------------------------------------
	//      +---+---+
	//      |   |   |
	//      +--(0)--+
	//      | b | c |
	//  +---F---+---+---+
	//  | e | f | g | h |
	//  +--(1)--+--(2)--+
	//  | i | j | k | l |
	//  +---+---+---+---+
	//      | n | o |
	//      +--(3)--+
	//      |   |   |
	//      +---+---+
	// Get position of 'F'.
	vec2 pp = HOOKED_pos * HOOKED_size - vec2(0.5);
	vec2 fp = floor(pp);
	pp -= fp;
	//------------------------------------------------------------------------------------------------------------------------------
	// 12-tap kernel.
	//    b c
	//  e f g h
	//  i j k l
	//    n o
	// Gather 4 ordering.
	//  a b
	//  r g
	// Allowing dead-code removal to remove the 'z's.
#if (defined(HOOKED_gather) && (__VERSION__ >= 400 || (GL_ES && __VERSION__ >= 310)))
	vec4 bczzL = HOOKED_gather(vec2((fp + vec2(1.0, -1.0)) * HOOKED_pt), 0);
	vec4 ijfeL = HOOKED_gather(vec2((fp + vec2(0.0,  1.0)) * HOOKED_pt), 0);
	vec4 klhgL = HOOKED_gather(vec2((fp + vec2(2.0,  1.0)) * HOOKED_pt), 0);
	vec4 zzonL = HOOKED_gather(vec2((fp + vec2(1.0,  3.0)) * HOOKED_pt), 0);
#else
	// pre-OpenGL 4.0 compatibility
	float b = HOOKED_tex(vec2((fp + vec2(0.5, -0.5)) * HOOKED_pt)).r;
	float c = HOOKED_tex(vec2((fp + vec2(1.5, -0.5)) * HOOKED_pt)).r;
	
	float e = HOOKED_tex(vec2((fp + vec2(-0.5, 0.5)) * HOOKED_pt)).r;
	float f = HOOKED_tex(vec2((fp + vec2( 0.5, 0.5)) * HOOKED_pt)).r;
	float g = HOOKED_tex(vec2((fp + vec2( 1.5, 0.5)) * HOOKED_pt)).r;
	float h = HOOKED_tex(vec2((fp + vec2( 2.5, 0.5)) * HOOKED_pt)).r;
	
	float i = HOOKED_tex(vec2((fp + vec2(-0.5, 1.5)) * HOOKED_pt)).r;
	float j = HOOKED_tex(vec2((fp + vec2( 0.5, 1.5)) * HOOKED_pt)).r;
	float k = HOOKED_tex(vec2((fp + vec2( 1.5, 1.5)) * HOOKED_pt)).r;
	float l = HOOKED_tex(vec2((fp + vec2( 2.5, 1.5)) * HOOKED_pt)).r;
	
	float n = HOOKED_tex(vec2((fp + vec2(0.5, 2.5) ) * HOOKED_pt)).r;
	float o = HOOKED_tex(vec2((fp + vec2(1.5, 2.5) ) * HOOKED_pt)).r;

	vec4 bczzL = vec4(b, c, 0.0, 0.0);
	vec4 ijfeL = vec4(i, j, f, e);
	vec4 klhgL = vec4(k, l, h, g);
	vec4 zzonL = vec4(0.0, 0.0, o, n);
#endif
	//------------------------------------------------------------------------------------------------------------------------------
	// Rename.
	float bL = bczzL.x;
	float cL = bczzL.y;
	float iL = ijfeL.x;
	float jL = ijfeL.y;
	float fL = ijfeL.z;
	float eL = ijfeL.w;
	float kL = klhgL.x;
	float lL = klhgL.y;
	float hL = klhgL.z;
	float gL = klhgL.w;
	float oL = zzonL.z;
	float nL = zzonL.w;

#if (FSR_PQ == 1)
	// Not the most performance-friendly solution, but should work until mpv adds proper gamma transformation functions for shaders
	bL = ToGamma2(bL);
	cL = ToGamma2(cL);
	iL = ToGamma2(iL);
	jL = ToGamma2(jL);
	fL = ToGamma2(fL);
	eL = ToGamma2(eL);
	kL = ToGamma2(kL);
	lL = ToGamma2(lL);
	hL = ToGamma2(hL);
	gL = ToGamma2(gL);
	oL = ToGamma2(oL);
	nL = ToGamma2(nL);
#endif

	// Accumulate for bilinear interpolation.
	vec2 dir = vec2(0.0);
	float len = 0.0;
#if (FSR_EASU_SIMPLE_ANALYSIS == 1)
	FsrEasuSet(dir, len, pp, bL, cL, iL, jL, fL, eL, kL, lL, hL, gL, oL, nL);
#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)
	FsrEasuSet(dir, len, pp, true, false, false, false, bL, eL, fL, gL, jL);
	FsrEasuSet(dir, len, pp, false, true, false, false, cL, fL, gL, hL, kL);
	FsrEasuSet(dir, len, pp, false, false, true, false, fL, iL, jL, kL, nL);
	FsrEasuSet(dir, len, pp, false, false, false, true, gL, jL, kL, lL, oL);
#endif
	//------------------------------------------------------------------------------------------------------------------------------
	// Normalize with approximation, and cleanup close to zero.
	vec2 dir2 = dir * dir;
	float dirR = dir2.x + dir2.y;
	bool zro = dirR < float(1.0 / FSR_EASU_DIR_THRESHOLD);
	dirR = APrxLoRsqF1(dirR);
#if (FSR_EASU_QUIT_EARLY == 1)
	if (zro) {
		vec4 w = vec4(0.0);
		w.x = (1.0 - pp.x) * (1.0 - pp.y);
		w.y =        pp.x  * (1.0 - pp.y);
		w.z = (1.0 - pp.x) *        pp.y;
		w.w =        pp.x  *        pp.y;

		pix.r = clamp(dot(w, vec4(fL, gL, jL, kL)), 0.0, 1.0);
		return pix;
	}
#elif (FSR_EASU_QUIT_EARLY == 0)
	dirR = zro ? 1.0 : dirR;
	dir.x = zro ? 1.0 : dir.x;
#endif
	dir *= vec2(dirR);
	// Transform from {0 to 2} to {0 to 1} range, and shape with square.
	len = len * 0.5;
	len *= len;
	// Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}.
	float stretch = (dir.x * dir.x + dir.y * dir.y) * APrxLoRcpF1(max(abs(dir.x), abs(dir.y)));
	// Anisotropic length after rotation,
	//  x := 1.0 lerp to 'stretch' on edges
	//  y := 1.0 lerp to 2x on edges
	vec2 len2 = vec2(1.0 + (stretch - 1.0) * len, 1.0 + -0.5 * len);
	// Based on the amount of 'edge',
	// the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.
	float lob = 0.5 + float((1.0 / 4.0 - 0.04) - 0.5) * len;
	// Set distance^2 clipping point to the end of the adjustable window.
	float clp = APrxLoRcpF1(lob);
	//------------------------------------------------------------------------------------------------------------------------------
	// Accumulation
	//    b c
	//  e f g h
	//  i j k l
	//    n o
	float aC = 0.0;
	float aW = 0.0;
	FsrEasuTap(aC, aW, vec2( 0.0,-1.0) - pp, dir, len2, lob, clp, bL); // b
	FsrEasuTap(aC, aW, vec2( 1.0,-1.0) - pp, dir, len2, lob, clp, cL); // c
	FsrEasuTap(aC, aW, vec2(-1.0, 1.0) - pp, dir, len2, lob, clp, iL); // i
	FsrEasuTap(aC, aW, vec2( 0.0, 1.0) - pp, dir, len2, lob, clp, jL); // j
	FsrEasuTap(aC, aW, vec2( 0.0, 0.0) - pp, dir, len2, lob, clp, fL); // f
	FsrEasuTap(aC, aW, vec2(-1.0, 0.0) - pp, dir, len2, lob, clp, eL); // e
	FsrEasuTap(aC, aW, vec2( 1.0, 1.0) - pp, dir, len2, lob, clp, kL); // k
	FsrEasuTap(aC, aW, vec2( 2.0, 1.0) - pp, dir, len2, lob, clp, lL); // l
	FsrEasuTap(aC, aW, vec2( 2.0, 0.0) - pp, dir, len2, lob, clp, hL); // h
	FsrEasuTap(aC, aW, vec2( 1.0, 0.0) - pp, dir, len2, lob, clp, gL); // g
	FsrEasuTap(aC, aW, vec2( 1.0, 2.0) - pp, dir, len2, lob, clp, oL); // o
	FsrEasuTap(aC, aW, vec2( 0.0, 2.0) - pp, dir, len2, lob, clp, nL); // n
	//------------------------------------------------------------------------------------------------------------------------------
	// Normalize and dering.
	pix.r = aC / aW;
#if (FSR_EASU_DERING == 1)
	float min1 = min(AMin3F1(fL, gL, jL), kL);
	float max1 = max(AMax3F1(fL, gL, jL), kL);
	pix.r = clamp(pix.r, min1, max1);
#endif
	pix.r = clamp(pix.r, 0.0, 1.0);

	return pix;
}

//!HOOK LUMA
//!BIND EASUTEX
//!DESC FidelityFX Super Resolution v1.0.2 (RCAS)
//!WIDTH EASUTEX.w
//!HEIGHT EASUTEX.h
//!COMPONENTS 1

// User variables - RCAS
#define SHARPNESS 0.2 // Controls the amount of sharpening. The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. 0.0 to 2.0.
#define FSR_RCAS_DENOISE 1 // If set to 1, lessens the sharpening on noisy areas. Can be disabled for better performance. 0 or 1.
#define FSR_PQ 0 // Whether the source content has PQ gamma or not. Needs to be set to the same value for both passes. 0 or 1.

// Shader code

#define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0)) // This is set at the limit of providing unnatural results for sharpening.

float APrxMedRcpF1(float a) {
	float b = uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a));
	return b * (-b * a + 2.0);
}

float AMax3F1(float x, float y, float z) {
	return max(x, max(y, z)); 
}

float AMin3F1(float x, float y, float z) {
	return min(x, min(y, z));
}

#if (FSR_PQ == 1)

float FromGamma2(float a) { 
	return sqrt(sqrt(a));
}

#endif

vec4 hook() {
	// Algorithm uses minimal 3x3 pixel neighborhood.
	//    b 
	//  d e f
	//    h
#if (defined(EASUTEX_gather) && (__VERSION__ >= 400 || (GL_ES && __VERSION__ >= 310)))
	vec3 bde = EASUTEX_gather(EASUTEX_pos + EASUTEX_pt * vec2(-0.5), 0).xyz;
	float b = bde.z;
	float d = bde.x;
	float e = bde.y;

	vec2 fh = EASUTEX_gather(EASUTEX_pos + EASUTEX_pt * vec2(0.5), 0).zx;
	float f = fh.x;
	float h = fh.y;
#else
	float b = EASUTEX_texOff(vec2( 0.0, -1.0)).r;
	float d = EASUTEX_texOff(vec2(-1.0,  0.0)).r;
	float e = EASUTEX_tex(EASUTEX_pos).r;
	float f = EASUTEX_texOff(vec2(1.0, 0.0)).r;
	float h = EASUTEX_texOff(vec2(0.0, 1.0)).r;
#endif

	// Min and max of ring.
	float mn1L = min(AMin3F1(b, d, f), h);
	float mx1L = max(AMax3F1(b, d, f), h);

	// Immediate constants for peak range.
	vec2 peakC = vec2(1.0, -1.0 * 4.0);

	// Limiters, these need to be high precision RCPs.
	float hitMinL = min(mn1L, e) / (4.0 * mx1L);
	float hitMaxL = (peakC.x - max(mx1L, e)) / (4.0 * mn1L + peakC.y);
	float lobeL = max(-hitMinL, hitMaxL);
	float lobe = max(float(-FSR_RCAS_LIMIT), min(lobeL, 0.0)) * exp2(-clamp(float(SHARPNESS), 0.0, 2.0));

	// Apply noise removal.
#if (FSR_RCAS_DENOISE == 1)
	// Noise detection.
	float nz = 0.25 * b + 0.25 * d + 0.25 * f + 0.25 * h - e;
	nz = clamp(abs(nz) * APrxMedRcpF1(AMax3F1(AMax3F1(b, d, e), f, h) - AMin3F1(AMin3F1(b, d, e), f, h)), 0.0, 1.0);
	nz = -0.5 * nz + 1.0;
	lobe *= nz;
#endif

	// Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
	float rcpL = APrxMedRcpF1(4.0 * lobe + 1.0);
	vec4 pix = vec4(0.0, 0.0, 0.0, 1.0);
	pix.r = float((lobe * b + lobe * d + lobe * h + lobe * f + e) * rcpL);
#if (FSR_PQ == 1)
	pix.r = FromGamma2(pix.r);
#endif

	return pix;
}
huge update 2024-06-01 05:00:40 +00:00			`// Copyright (c) 2021 Advanced Micro Devices, Inc. All rights reserved.`
			`//`
			`// Permission is hereby granted, free of charge, to any person obtaining a copy`
			`// of this software and associated documentation files (the "Software"), to deal`
			`// in the Software without restriction, including without limitation the rights`
			`// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell`
			`// copies of the Software, and to permit persons to whom the Software is`
			`// furnished to do so, subject to the following conditions:`
			`//`
			`// The above copyright notice and this permission notice shall be included in`
			`// all copies or substantial portions of the Software.`
			`//`
			`// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE`
			`// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN`
			`// THE SOFTWARE.`

			`// FidelityFX FSR v1.0.2 by AMD`
			`// ported to mpv by agyild`

			`// Changelog`
			`// Made it compatible with pre-OpenGL 4.0 renderers`
			`// Made it directly operate on LUMA plane, since the original shader was operating on LUMA by deriving it from RGB. This should cause a major increase in performance, especially on OpenGL 4.0+ renderers (4+2 texture lookups vs. 12+5)`
			`// Removed transparency preservation mechanism since the alpha channel is a separate source plane than LUMA`
			`// Added optional performance-saving lossy optimizations to EASU (Credit: atyuwen, https://atyuwen.github.io/posts/optimizing-fsr/)`
			`//`
			`// Notes`
			`// Per AMD's guidelines only upscales content up to 4x (e.g., 1080p -> 2160p, 720p -> 1440p etc.) and everything else in between,`
			`// that means FSR will scale up to 4x at maximum, and any further scaling will be processed by mpv's scalers`

			`//!HOOK LUMA`
			`//!BIND HOOKED`
			`//!SAVE EASUTEX`
			`//!DESC FidelityFX Super Resolution v1.0.2 (EASU)`
			`//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 >`
			`//!WIDTH OUTPUT.w OUTPUT.w LUMA.w 2 * < * LUMA.w 2 * OUTPUT.w LUMA.w 2 * > * + OUTPUT.w OUTPUT.w LUMA.w 2 * = * +`
			`//!HEIGHT OUTPUT.h OUTPUT.h LUMA.h 2 * < * LUMA.h 2 * OUTPUT.h LUMA.h 2 * > * + OUTPUT.h OUTPUT.h LUMA.h 2 * = * +`
			`//!COMPONENTS 1`

			`// User variables - EASU`
			`#define FSR_PQ 0 // Whether the source content has PQ gamma or not. Needs to be set to the same value for both passes. 0 or 1.`
			`#define FSR_EASU_DERING 1 // If set to 0, disables deringing for a small increase in performance. 0 or 1.`
			`#define FSR_EASU_SIMPLE_ANALYSIS 0 // If set to 1, uses a simpler single-pass direction and length analysis for an increase in performance. 0 or 1.`
			`#define FSR_EASU_QUIT_EARLY 0 // If set to 1, uses bilinear filtering for non-edge pixels and skips EASU on those regions for an increase in performance. 0 or 1.`

			`// Shader code`

			`#ifndef FSR_EASU_DIR_THRESHOLD`
			`#if (FSR_EASU_QUIT_EARLY == 1)`
			`#define FSR_EASU_DIR_THRESHOLD 64.0`
			`#elif (FSR_EASU_QUIT_EARLY == 0)`
			`#define FSR_EASU_DIR_THRESHOLD 32768.0`
			`#endif`
			`#endif`

			`float APrxLoRcpF1(float a) {`
			`return uintBitsToFloat(uint(0x7ef07ebb) - floatBitsToUint(a));`
			`}`

			`float APrxLoRsqF1(float a) {`
			`return uintBitsToFloat(uint(0x5f347d74) - (floatBitsToUint(a) >> uint(1)));`
			`}`

			`float AMin3F1(float x, float y, float z) {`
			`return min(x, min(y, z));`
			`}`

			`float AMax3F1(float x, float y, float z) {`
			`return max(x, max(y, z));`
			`}`

			`#if (FSR_PQ == 1)`

			`float ToGamma2(float a) {`
			`return pow(a, 4.0);`
			`}`

			`#endif`

			`// Filtering for a given tap for the scalar.`
			`void FsrEasuTap(`
			`inout float aC, // Accumulated color, with negative lobe.`
			`inout float aW, // Accumulated weight.`
			`vec2 off, // Pixel offset from resolve position to tap.`
			`vec2 dir, // Gradient direction.`
			`vec2 len, // Length.`
			`float lob, // Negative lobe strength.`
			`float clp, // Clipping point.`
			`float c){ // Tap color.`
			`// Rotate offset by direction.`
			`vec2 v;`
			`v.x = (off.x * ( dir.x)) + (off.y * dir.y);`
			`v.y = (off.x * (-dir.y)) + (off.y * dir.x);`
			`// Anisotropy.`
			`v *= len;`
			`// Compute distance^2.`
			`float d2 = v.x * v.x + v.y * v.y;`
			`// Limit to the window as at corner, 2 taps can easily be outside.`
			`d2 = min(d2, clp);`
			`// Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.`
			`// (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2`
			`// \|_______________________________________\| \|_______________\|`
			`// base window`
			`// The general form of the 'base' is,`
			`// (a(bx^2-1)^2-(a-1))`
			`// Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.`
			`float wB = float(2.0 / 5.0) * d2 + -1.0;`
			`float wA = lob * d2 + -1.0;`
			`wB *= wB;`
			`wA *= wA;`
			`wB = float(25.0 / 16.0) * wB + float(-(25.0 / 16.0 - 1.0));`
			`float w = wB * wA;`
			`// Do weighted average.`
			`aC += c * w;`
			`aW += w;`
			`}`

			`// Accumulate direction and length.`
			`void FsrEasuSet(`
			`inout vec2 dir,`
			`inout float len,`
			`vec2 pp,`
			`#if (FSR_EASU_SIMPLE_ANALYSIS == 1)`
			`float b, float c,`
			`float i, float j, float f, float e,`
			`float k, float l, float h, float g,`
			`float o, float n`
			`#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)`
			`bool biS, bool biT, bool biU, bool biV,`
			`float lA, float lB, float lC, float lD, float lE`
			`#endif`
			`){`
			`// Compute bilinear weight, branches factor out as predicates are compiler time immediates.`
			`// s t`
			`// u v`
			`#if (FSR_EASU_SIMPLE_ANALYSIS == 1)`
			`vec4 w = vec4(0.0);`
			`w.x = (1.0 - pp.x) * (1.0 - pp.y);`
			`w.y = pp.x * (1.0 - pp.y);`
			`w.z = (1.0 - pp.x) * pp.y;`
			`w.w = pp.x * pp.y;`

			`float lA = dot(w, vec4(b, c, f, g));`
			`float lB = dot(w, vec4(e, f, i, j));`
			`float lC = dot(w, vec4(f, g, j, k));`
			`float lD = dot(w, vec4(g, h, k, l));`
			`float lE = dot(w, vec4(j, k, n, o));`
			`#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)`
			`float w = 0.0;`
			`if (biS)`
			`w = (1.0 - pp.x) * (1.0 - pp.y);`
			`if (biT)`
			`w = pp.x * (1.0 - pp.y);`
			`if (biU)`
			`w = (1.0 - pp.x) * pp.y;`
			`if (biV)`
			`w = pp.x * pp.y;`
			`#endif`
			`// Direction is the '+' diff.`
			`// a`
			`// b c d`
			`// e`
			`// Then takes magnitude from abs average of both sides of 'c'.`
			`// Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.`
			`float dc = lD - lC;`
			`float cb = lC - lB;`
			`float lenX = max(abs(dc), abs(cb));`
			`lenX = APrxLoRcpF1(lenX);`
			`float dirX = lD - lB;`
			`lenX = clamp(abs(dirX) * lenX, 0.0, 1.0);`
			`lenX *= lenX;`
			`// Repeat for the y axis.`
			`float ec = lE - lC;`
			`float ca = lC - lA;`
			`float lenY = max(abs(ec), abs(ca));`
			`lenY = APrxLoRcpF1(lenY);`
			`float dirY = lE - lA;`
			`lenY = clamp(abs(dirY) * lenY, 0.0, 1.0);`
			`lenY *= lenY;`
			`#if (FSR_EASU_SIMPLE_ANALYSIS == 1)`
			`len = lenX + lenY;`
			`dir = vec2(dirX, dirY);`
			`#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)`
			`dir += vec2(dirX, dirY) * w;`
			`len += dot(vec2(w), vec2(lenX, lenY));`
			`#endif`
			`}`

			`vec4 hook() {`
			`// Result`
			`vec4 pix = vec4(0.0, 0.0, 0.0, 1.0);`

			`//------------------------------------------------------------------------------------------------------------------------------`
			`// +---+---+`
			`// \| \| \|`
			`// +--(0)--+`
			`// \| b \| c \|`
			`// +---F---+---+---+`
			`// \| e \| f \| g \| h \|`
			`// +--(1)--+--(2)--+`
			`// \| i \| j \| k \| l \|`
			`// +---+---+---+---+`
			`// \| n \| o \|`
			`// +--(3)--+`
			`// \| \| \|`
			`// +---+---+`
			`// Get position of 'F'.`
			`vec2 pp = HOOKED_pos * HOOKED_size - vec2(0.5);`
			`vec2 fp = floor(pp);`
			`pp -= fp;`
			`//------------------------------------------------------------------------------------------------------------------------------`
			`// 12-tap kernel.`
			`// b c`
			`// e f g h`
			`// i j k l`
			`// n o`
			`// Gather 4 ordering.`
			`// a b`
			`// r g`
			`// Allowing dead-code removal to remove the 'z's.`
			`#if (defined(HOOKED_gather) && (__VERSION__ >= 400 \|\| (GL_ES && __VERSION__ >= 310)))`
			`vec4 bczzL = HOOKED_gather(vec2((fp + vec2(1.0, -1.0)) * HOOKED_pt), 0);`
			`vec4 ijfeL = HOOKED_gather(vec2((fp + vec2(0.0, 1.0)) * HOOKED_pt), 0);`
			`vec4 klhgL = HOOKED_gather(vec2((fp + vec2(2.0, 1.0)) * HOOKED_pt), 0);`
			`vec4 zzonL = HOOKED_gather(vec2((fp + vec2(1.0, 3.0)) * HOOKED_pt), 0);`
			`#else`
			`// pre-OpenGL 4.0 compatibility`
			`float b = HOOKED_tex(vec2((fp + vec2(0.5, -0.5)) * HOOKED_pt)).r;`
			`float c = HOOKED_tex(vec2((fp + vec2(1.5, -0.5)) * HOOKED_pt)).r;`

			`float e = HOOKED_tex(vec2((fp + vec2(-0.5, 0.5)) * HOOKED_pt)).r;`
			`float f = HOOKED_tex(vec2((fp + vec2( 0.5, 0.5)) * HOOKED_pt)).r;`
			`float g = HOOKED_tex(vec2((fp + vec2( 1.5, 0.5)) * HOOKED_pt)).r;`
			`float h = HOOKED_tex(vec2((fp + vec2( 2.5, 0.5)) * HOOKED_pt)).r;`

			`float i = HOOKED_tex(vec2((fp + vec2(-0.5, 1.5)) * HOOKED_pt)).r;`
			`float j = HOOKED_tex(vec2((fp + vec2( 0.5, 1.5)) * HOOKED_pt)).r;`
			`float k = HOOKED_tex(vec2((fp + vec2( 1.5, 1.5)) * HOOKED_pt)).r;`
			`float l = HOOKED_tex(vec2((fp + vec2( 2.5, 1.5)) * HOOKED_pt)).r;`

			`float n = HOOKED_tex(vec2((fp + vec2(0.5, 2.5) ) * HOOKED_pt)).r;`
			`float o = HOOKED_tex(vec2((fp + vec2(1.5, 2.5) ) * HOOKED_pt)).r;`

			`vec4 bczzL = vec4(b, c, 0.0, 0.0);`
			`vec4 ijfeL = vec4(i, j, f, e);`
			`vec4 klhgL = vec4(k, l, h, g);`
			`vec4 zzonL = vec4(0.0, 0.0, o, n);`
			`#endif`
			`//------------------------------------------------------------------------------------------------------------------------------`
			`// Rename.`
			`float bL = bczzL.x;`
			`float cL = bczzL.y;`
			`float iL = ijfeL.x;`
			`float jL = ijfeL.y;`
			`float fL = ijfeL.z;`
			`float eL = ijfeL.w;`
			`float kL = klhgL.x;`
			`float lL = klhgL.y;`
			`float hL = klhgL.z;`
			`float gL = klhgL.w;`
			`float oL = zzonL.z;`
			`float nL = zzonL.w;`

			`#if (FSR_PQ == 1)`
			`// Not the most performance-friendly solution, but should work until mpv adds proper gamma transformation functions for shaders`
			`bL = ToGamma2(bL);`
			`cL = ToGamma2(cL);`
			`iL = ToGamma2(iL);`
			`jL = ToGamma2(jL);`
			`fL = ToGamma2(fL);`
			`eL = ToGamma2(eL);`
			`kL = ToGamma2(kL);`
			`lL = ToGamma2(lL);`
			`hL = ToGamma2(hL);`
			`gL = ToGamma2(gL);`
			`oL = ToGamma2(oL);`
			`nL = ToGamma2(nL);`
			`#endif`

			`// Accumulate for bilinear interpolation.`
			`vec2 dir = vec2(0.0);`
			`float len = 0.0;`
			`#if (FSR_EASU_SIMPLE_ANALYSIS == 1)`
			`FsrEasuSet(dir, len, pp, bL, cL, iL, jL, fL, eL, kL, lL, hL, gL, oL, nL);`
			`#elif (FSR_EASU_SIMPLE_ANALYSIS == 0)`
			`FsrEasuSet(dir, len, pp, true, false, false, false, bL, eL, fL, gL, jL);`
			`FsrEasuSet(dir, len, pp, false, true, false, false, cL, fL, gL, hL, kL);`
			`FsrEasuSet(dir, len, pp, false, false, true, false, fL, iL, jL, kL, nL);`
			`FsrEasuSet(dir, len, pp, false, false, false, true, gL, jL, kL, lL, oL);`
			`#endif`
			`//------------------------------------------------------------------------------------------------------------------------------`
			`// Normalize with approximation, and cleanup close to zero.`
			`vec2 dir2 = dir * dir;`
			`float dirR = dir2.x + dir2.y;`
			`bool zro = dirR < float(1.0 / FSR_EASU_DIR_THRESHOLD);`
			`dirR = APrxLoRsqF1(dirR);`
			`#if (FSR_EASU_QUIT_EARLY == 1)`
			`if (zro) {`
			`vec4 w = vec4(0.0);`
			`w.x = (1.0 - pp.x) * (1.0 - pp.y);`
			`w.y = pp.x * (1.0 - pp.y);`
			`w.z = (1.0 - pp.x) * pp.y;`
			`w.w = pp.x * pp.y;`

			`pix.r = clamp(dot(w, vec4(fL, gL, jL, kL)), 0.0, 1.0);`
			`return pix;`
			`}`
			`#elif (FSR_EASU_QUIT_EARLY == 0)`
			`dirR = zro ? 1.0 : dirR;`
			`dir.x = zro ? 1.0 : dir.x;`
			`#endif`
			`dir *= vec2(dirR);`
			`// Transform from {0 to 2} to {0 to 1} range, and shape with square.`
			`len = len * 0.5;`
			`len *= len;`
			`// Stretch kernel {1.0 vert\|horz, to sqrt(2.0) on diagonal}.`
			`float stretch = (dir.x * dir.x + dir.y * dir.y) * APrxLoRcpF1(max(abs(dir.x), abs(dir.y)));`
			`// Anisotropic length after rotation,`
			`// x := 1.0 lerp to 'stretch' on edges`
			`// y := 1.0 lerp to 2x on edges`
			`vec2 len2 = vec2(1.0 + (stretch - 1.0) * len, 1.0 + -0.5 * len);`
			`// Based on the amount of 'edge',`
			`// the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.`
			`float lob = 0.5 + float((1.0 / 4.0 - 0.04) - 0.5) * len;`
			`// Set distance^2 clipping point to the end of the adjustable window.`
			`float clp = APrxLoRcpF1(lob);`
			`//------------------------------------------------------------------------------------------------------------------------------`
			`// Accumulation`
			`// b c`
			`// e f g h`
			`// i j k l`
			`// n o`
			`float aC = 0.0;`
			`float aW = 0.0;`
			`FsrEasuTap(aC, aW, vec2( 0.0,-1.0) - pp, dir, len2, lob, clp, bL); // b`
			`FsrEasuTap(aC, aW, vec2( 1.0,-1.0) - pp, dir, len2, lob, clp, cL); // c`
			`FsrEasuTap(aC, aW, vec2(-1.0, 1.0) - pp, dir, len2, lob, clp, iL); // i`
			`FsrEasuTap(aC, aW, vec2( 0.0, 1.0) - pp, dir, len2, lob, clp, jL); // j`
			`FsrEasuTap(aC, aW, vec2( 0.0, 0.0) - pp, dir, len2, lob, clp, fL); // f`
			`FsrEasuTap(aC, aW, vec2(-1.0, 0.0) - pp, dir, len2, lob, clp, eL); // e`
			`FsrEasuTap(aC, aW, vec2( 1.0, 1.0) - pp, dir, len2, lob, clp, kL); // k`
			`FsrEasuTap(aC, aW, vec2( 2.0, 1.0) - pp, dir, len2, lob, clp, lL); // l`
			`FsrEasuTap(aC, aW, vec2( 2.0, 0.0) - pp, dir, len2, lob, clp, hL); // h`
			`FsrEasuTap(aC, aW, vec2( 1.0, 0.0) - pp, dir, len2, lob, clp, gL); // g`
			`FsrEasuTap(aC, aW, vec2( 1.0, 2.0) - pp, dir, len2, lob, clp, oL); // o`
			`FsrEasuTap(aC, aW, vec2( 0.0, 2.0) - pp, dir, len2, lob, clp, nL); // n`
			`//------------------------------------------------------------------------------------------------------------------------------`
			`// Normalize and dering.`
			`pix.r = aC / aW;`
			`#if (FSR_EASU_DERING == 1)`
			`float min1 = min(AMin3F1(fL, gL, jL), kL);`
			`float max1 = max(AMax3F1(fL, gL, jL), kL);`
			`pix.r = clamp(pix.r, min1, max1);`
			`#endif`
			`pix.r = clamp(pix.r, 0.0, 1.0);`

			`return pix;`
			`}`

			`//!HOOK LUMA`
			`//!BIND EASUTEX`
			`//!DESC FidelityFX Super Resolution v1.0.2 (RCAS)`
			`//!WIDTH EASUTEX.w`
			`//!HEIGHT EASUTEX.h`
			`//!COMPONENTS 1`

			`// User variables - RCAS`
			`#define SHARPNESS 0.2 // Controls the amount of sharpening. The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}. 0.0 to 2.0.`
			`#define FSR_RCAS_DENOISE 1 // If set to 1, lessens the sharpening on noisy areas. Can be disabled for better performance. 0 or 1.`
			`#define FSR_PQ 0 // Whether the source content has PQ gamma or not. Needs to be set to the same value for both passes. 0 or 1.`

			`// Shader code`

			`#define FSR_RCAS_LIMIT (0.25 - (1.0 / 16.0)) // This is set at the limit of providing unnatural results for sharpening.`

			`float APrxMedRcpF1(float a) {`
			`float b = uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a));`
			`return b * (-b * a + 2.0);`
			`}`

			`float AMax3F1(float x, float y, float z) {`
			`return max(x, max(y, z));`
			`}`

			`float AMin3F1(float x, float y, float z) {`
			`return min(x, min(y, z));`
			`}`

			`#if (FSR_PQ == 1)`

			`float FromGamma2(float a) {`
			`return sqrt(sqrt(a));`
			`}`

			`#endif`

			`vec4 hook() {`
			`// Algorithm uses minimal 3x3 pixel neighborhood.`
			`// b`
			`// d e f`
			`// h`
			`#if (defined(EASUTEX_gather) && (__VERSION__ >= 400 \|\| (GL_ES && __VERSION__ >= 310)))`
			`vec3 bde = EASUTEX_gather(EASUTEX_pos + EASUTEX_pt * vec2(-0.5), 0).xyz;`
			`float b = bde.z;`
			`float d = bde.x;`
			`float e = bde.y;`

			`vec2 fh = EASUTEX_gather(EASUTEX_pos + EASUTEX_pt * vec2(0.5), 0).zx;`
			`float f = fh.x;`
			`float h = fh.y;`
			`#else`
			`float b = EASUTEX_texOff(vec2( 0.0, -1.0)).r;`
			`float d = EASUTEX_texOff(vec2(-1.0, 0.0)).r;`
			`float e = EASUTEX_tex(EASUTEX_pos).r;`
			`float f = EASUTEX_texOff(vec2(1.0, 0.0)).r;`
			`float h = EASUTEX_texOff(vec2(0.0, 1.0)).r;`
			`#endif`

			`// Min and max of ring.`
			`float mn1L = min(AMin3F1(b, d, f), h);`
			`float mx1L = max(AMax3F1(b, d, f), h);`

			`// Immediate constants for peak range.`
			`vec2 peakC = vec2(1.0, -1.0 * 4.0);`

			`// Limiters, these need to be high precision RCPs.`
			`float hitMinL = min(mn1L, e) / (4.0 * mx1L);`
			`float hitMaxL = (peakC.x - max(mx1L, e)) / (4.0 * mn1L + peakC.y);`
			`float lobeL = max(-hitMinL, hitMaxL);`
			`float lobe = max(float(-FSR_RCAS_LIMIT), min(lobeL, 0.0)) * exp2(-clamp(float(SHARPNESS), 0.0, 2.0));`

			`// Apply noise removal.`
			`#if (FSR_RCAS_DENOISE == 1)`
			`// Noise detection.`
			`float nz = 0.25 * b + 0.25 * d + 0.25 * f + 0.25 * h - e;`
			`nz = clamp(abs(nz) * APrxMedRcpF1(AMax3F1(AMax3F1(b, d, e), f, h) - AMin3F1(AMin3F1(b, d, e), f, h)), 0.0, 1.0);`
			`nz = -0.5 * nz + 1.0;`
			`lobe *= nz;`
			`#endif`

			`// Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.`
			`float rcpL = APrxMedRcpF1(4.0 * lobe + 1.0);`
			`vec4 pix = vec4(0.0, 0.0, 0.0, 1.0);`
			`pix.r = float((lobe * b + lobe * d + lobe * h + lobe * f + e) * rcpL);`
			`#if (FSR_PQ == 1)`
			`pix.r = FromGamma2(pix.r);`
			`#endif`

			`return pix;`
			`}`