diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/CSMain_converter.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/CSMain_converter.hlsl new file mode 100644 index 0000000000..52501d00a2 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/CSMain_converter.hlsl @@ -0,0 +1,1169 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +#ifdef BUILDING_CSMain_YUY2_to_AYUV +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (tid); + float Y0 = val.r; + float U = val.g; + float Y1 = val.b; + float V = val.a; + + outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V); + outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V); +} +#endif + +#ifdef BUILDING_CSMain_UYVY_to_AYUV +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (tid); + float Y0 = val.g; + float U = val.r; + float Y1 = val.a; + float V = val.b; + + outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V); + outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V); +} +#endif + +#ifdef BUILDING_CSMain_VYUY_to_AYUV +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (tid); + float Y0 = val.g; + float U = val.b; + float Y1 = val.a; + float V = val.r; + + outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V); + outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V); +} +#endif + +#ifdef BUILDING_CSMain_YVYU_to_AYUV +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (tid); + float Y0 = val.r; + float U = val.a; + float Y1 = val.b; + float V = val.g; + + outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V); + outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V); +} +#endif + +#ifdef BUILDING_CSMain_v210_to_AYUV +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + uint xpos = tid.x * 4; + float3 val = inTex.Load (uint3 (xpos, tid.y, 0)).xyz; + float U0 = val.r; + float Y0 = val.g; + float V0 = val.b; + + val = inTex.Load (uint3 (xpos + 1, tid.y, 0)).xyz; + float Y1 = val.r; + float U2 = val.g; + float Y2 = val.b; + + val = inTex.Load (uint3 (xpos + 2, tid.y, 0)).xyz; + float V2 = val.r; + float Y3 = val.g; + float U4 = val.b; + + val = inTex.Load (uint3 (xpos + 3, tid.y, 0)).xyz; + float Y4 = val.r; + float V4 = val.g; + float Y5 = val.b; + + xpos = tid.x * 6; + outTex[uint2(xpos, tid.y)] = float4 (1.0, Y0, U0, V0); + outTex[uint2(xpos + 1, tid.y)] = float4 (1.0, Y1, U0, V0); + outTex[uint2(xpos + 2, tid.y)] = float4 (1.0, Y2, U2, V2); + outTex[uint2(xpos + 3, tid.y)] = float4 (1.0, Y3, U2, V2); + outTex[uint2(xpos + 4, tid.y)] = float4 (1.0, Y4, U4, V4); + outTex[uint2(xpos + 5, tid.y)] = float4 (1.0, Y5, U4, V4); +} +#endif + +#ifdef BUILDING_CSMain_v308_to_AYUV +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (uint3 (tid.x * 3, tid.y, 0)); + float Y0 = val.x; + float U0 = val.y; + float V0 = val.z; + float Y1 = val.w; + + val = inTex.Load (uint3 (tid.x * 3 + 1, tid.y, 0)); + float U1 = val.x; + float V1 = val.y; + float Y2 = val.z; + float U2 = val.w; + + val = inTex.Load (uint3 (tid.x * 3 + 2, tid.y, 0)); + float V2 = val.x; + float Y3 = val.y; + float U3 = val.z; + float V3 = val.w; + + outTex[uint2(tid.x * 4, tid.y)] = float4 (1.0, Y0, U0, V0); + outTex[uint2(tid.x * 4 + 1, tid.y)] = float4 (1.0, Y1, U1, V1); + outTex[uint2(tid.x * 4 + 2, tid.y)] = float4 (1.0, Y2, U2, V2); + outTex[uint2(tid.x * 4 + 3, tid.y)] = float4 (1.0, Y3, U3, V3); +} +#endif + +#ifdef BUILDING_CSMain_IYU2_to_AYUV +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (uint3 (tid.x * 3, tid.y, 0)); + float U0 = val.x; + float Y0 = val.y; + float V0 = val.z; + float U1 = val.w; + + val = inTex.Load (uint3 (tid.x * 3 + 1, tid.y, 0)); + float Y1 = val.x; + float V1 = val.y; + float U2 = val.z; + float Y2 = val.w; + + val = inTex.Load (uint3 (tid.x * 3 + 2, tid.y, 0)); + float V2 = val.x; + float U3 = val.y; + float Y3 = val.z; + float V3 = val.w; + + outTex[uint2(tid.x * 4, tid.y)] = float4 (1.0, Y0, U0, V0); + outTex[uint2(tid.x * 4 + 1, tid.y)] = float4 (1.0, Y1, U1, V1); + outTex[uint2(tid.x * 4 + 2, tid.y)] = float4 (1.0, Y2, U2, V2); + outTex[uint2(tid.x * 4 + 3, tid.y)] = float4 (1.0, Y3, U3, V3); +} +#endif + +#ifdef BUILDING_CSMain_AYUV_to_YUY2 +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw; + float Y0 = val.x; + float U = val.y; + float V = val.z; + float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y; + + outTex[tid.xy] = float4 (Y0, U, Y1, V); +} +#endif + +#ifdef BUILDING_CSMain_AYUV_to_UYVY +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw; + float Y0 = val.x; + float U = val.y; + float V = val.z; + float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y; + + outTex[tid.xy] = float4 (U, Y0, V, Y1); +} +#endif + +#ifdef BUILDING_CSMain_AYUV_to_VYUY +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw; + float Y0 = val.x; + float U = val.y; + float V = val.z; + float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y; + + outTex[tid.xy] = float4 (V, Y0, U, Y1); +} +#endif + +#ifdef BUILDING_CSMain_AYUV_to_YVYU +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw; + float Y0 = val.x; + float U = val.y; + float V = val.z; + float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y; + + outTex[tid.xy] = float4 (Y0, V, Y1, U); +} +#endif + +#ifdef BUILDING_CSMain_AYUV_to_v210 +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + uint xpos = tid.x * 6; + float3 val = inTex.Load (uint3 (xpos, tid.y, 0)).yzw; + float Y0 = val.x; + float U0 = val.y; + float V0 = val.z; + float Y1 = inTex.Load (uint3 (xpos + 1, tid.y, 0)).y; + + val = inTex.Load (uint3 (xpos + 2, tid.y, 0)).yzw; + float Y2 = val.x; + float U2 = val.y; + float V2 = val.z; + float Y3 = inTex.Load (uint3 (xpos + 3, tid.y, 0)).y; + + val = inTex.Load (uint3 (xpos + 4, tid.y, 0)).yzw; + float Y4 = val.x; + float U4 = val.y; + float V4 = val.z; + float Y5 = inTex.Load (uint3 (xpos + 5, tid.y, 0)).y; + + xpos = tid.x * 4; + outTex[uint2(xpos, tid.y)] = float4 (U0, Y0, V0, 0); + outTex[uint2(xpos + 1, tid.y)] = float4 (Y1, U2, Y2, 0); + outTex[uint2(xpos + 2, tid.y)] = float4 (V2, Y3, U4, 0); + outTex[uint2(xpos + 3, tid.y)] = float4 (Y4, V4, Y5, 0); +} +#endif + +#ifdef BUILDING_CSMain_AYUV_to_v308 +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float3 val0 = inTex.Load (uint3 (tid.x * 4, tid.y, 0)).yzw; + float3 val1 = inTex.Load (uint3 (tid.x * 4 + 1, tid.y, 0)).yzw; + float3 val2 = inTex.Load (uint3 (tid.x * 4 + 2, tid.y, 0)).yzw; + float3 val3 = inTex.Load (uint3 (tid.x * 4 + 3, tid.y, 0)).yzw; + + outTex[uint2(tid.x * 3, tid.y)] = float4 (val0.x, val0.y, val0.z, val1.x); + outTex[uint2(tid.x * 3 + 1, tid.y)] = float4 (val1.y, val1.z, val2.x, val2.y); + outTex[uint2(tid.x * 3 + 2, tid.y)] = float4 (val2.z, val3.x, val3.y, val3.z); +} +#endif + +#ifdef BUILDING_CSMain_AYUV_to_IYU2 +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float3 val0 = inTex.Load (uint3 (tid.x * 4, tid.y, 0)).zyw; + float3 val1 = inTex.Load (uint3 (tid.x * 4 + 1, tid.y, 0)).zyw; + float3 val2 = inTex.Load (uint3 (tid.x * 4 + 2, tid.y, 0)).zyw; + float3 val3 = inTex.Load (uint3 (tid.x * 4 + 3, tid.y, 0)).zyw; + + outTex[uint2(tid.x * 3, tid.y)] = float4 (val0.x, val0.y, val0.z, val1.x); + outTex[uint2(tid.x * 3 + 1, tid.y)] = float4 (val1.y, val1.z, val2.x, val2.y); + outTex[uint2(tid.x * 3 + 2, tid.y)] = float4 (val2.z, val3.x, val3.y, val3.z); +} +#endif + +#ifdef BUILDING_CSMain_AYUV_to_Y410 +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (tid); + float Y = val.y; + float U = val.z; + float V = val.w; + float A = val.x; + + outTex[tid.xy] = float4 (U, Y, V, A); +} +#endif + +#ifdef BUILDING_CSMain_RGB_to_RGBA +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (uint3 (tid.x * 3, tid.y, 0)); + float R0 = val.r; + float G0 = val.g; + float B0 = val.b; + float R1 = val.a; + + val = inTex.Load (uint3 (tid.x * 3 + 1, tid.y, 0)); + float G1 = val.r; + float B1 = val.g; + float R2 = val.b; + float G2 = val.a; + + val = inTex.Load (uint3 (tid.x * 3 + 2, tid.y, 0)); + float B2 = val.r; + float R3 = val.g; + float G3 = val.b; + float B3 = val.a; + + outTex[uint2(tid.x * 4, tid.y)] = float4 (R0, G0, B0, 1.0); + outTex[uint2(tid.x * 4 + 1, tid.y)] = float4 (R1, G1, B1, 1.0); + outTex[uint2(tid.x * 4 + 2, tid.y)] = float4 (R2, G2, B2, 1.0); + outTex[uint2(tid.x * 4 + 3, tid.y)] = float4 (R3, G3, B3, 1.0); +} +#endif + +#ifdef BUILDING_CSMain_BGR_to_RGBA +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (uint3 (tid.x * 3, tid.y, 0)); + float B0 = val.r; + float G0 = val.g; + float R0 = val.b; + float B1 = val.a; + + val = inTex.Load (uint3 (tid.x * 3 + 1, tid.y, 0)); + float G1 = val.r; + float R1 = val.g; + float B2 = val.b; + float G2 = val.a; + + val = inTex.Load (uint3 (tid.x * 3 + 2, tid.y, 0)); + float R2 = val.r; + float B3 = val.g; + float G3 = val.b; + float R3 = val.a; + + outTex[uint2(tid.x * 4, tid.y)] = float4 (R0, G0, B0, 1.0); + outTex[uint2(tid.x * 4 + 1, tid.y)] = float4 (R1, G1, B1, 1.0); + outTex[uint2(tid.x * 4 + 2, tid.y)] = float4 (R2, G2, B2, 1.0); + outTex[uint2(tid.x * 4 + 3, tid.y)] = float4 (R3, G3, B3, 1.0); +} +#endif + +#ifdef BUILDING_CSMain_RGB16_to_RGBA +Texture2D<uint> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + uint val = inTex.Load (tid); + float R = float ((val & 0xf800) >> 11) / 31; + float G = float ((val & 0x7e0) >> 5) / 63; + float B = float ((val & 0x1f)) / 31; + + outTex[tid.xy] = float4 (R, G, B, 1.0); +} +#endif + +#ifdef BUILDING_CSMain_BGR16_to_RGBA +Texture2D<uint> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + uint val = inTex.Load (tid); + float B = float ((val & 0xf800) >> 11) / 31; + float G = float ((val & 0x7e0) >> 5) / 63; + float R = float ((val & 0x1f)) / 31; + + outTex[tid.xy] = float4 (R, G, B, 1.0); +} +#endif + +#ifdef BUILDING_CSMain_RGB15_to_RGBA +Texture2D<uint> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + uint val = inTex.Load (tid); + uint R = (val & 0x7c00) >> 10; + uint G = (val & 0x3e0) >> 5; + uint B = (val & 0x1f); + + outTex[tid.xy] = float4 (float3 (R, G, B) / 31, 1.0); +} +#endif + +#ifdef BUILDING_CSMain_BGR15_to_RGBA +Texture2D<uint> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + uint val = inTex.Load (tid); + uint B = (val & 0x7c00) >> 10; + uint G = (val & 0x3e0) >> 5; + uint R = (val & 0x1f); + + outTex[tid.xy] = float4 (float3 (R, G, B) / 31, 1.0); +} +#endif + +#ifdef BUILDING_CSMain_r210_to_RGBA +Texture2D<uint> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + uint val = inTex.Load (tid); + uint val_be = ((val & 0xff) << 24) | ((val & 0xff00) << 8) | + ((val & 0xff0000) >> 8) | ((val & 0xff000000) >> 24); + uint R = (val_be >> 20) & 0x3ff; + uint G = (val_be >> 10) & 0x3ff; + uint B = val_be & 0x3ff; + + outTex[tid.xy] = float4 (float3 (R, G, B) / 1023, 1.0); +} +#endif + +#ifdef BUILDING_CSMain_RGBA_to_RGB +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float3 val0 = inTex.Load (uint3 (tid.x * 4, tid.y, 0)).rgb; + float3 val1 = inTex.Load (uint3 (tid.x * 4 + 1, tid.y, 0)).rgb; + float3 val2 = inTex.Load (uint3 (tid.x * 4 + 2, tid.y, 0)).rgb; + float3 val3 = inTex.Load (uint3 (tid.x * 4 + 3, tid.y, 0)).rgb; + + outTex[uint2(tid.x * 3, tid.y)] = float4 (val0.r, val0.g, val0.b, val1.r); + outTex[uint2(tid.x * 3 + 1, tid.y)] = float4 (val1.g, val1.b, val2.r, val2.g); + outTex[uint2(tid.x * 3 + 2, tid.y)] = float4 (val2.b, val3.r, val3.g, val3.b); +} +#endif + +#ifdef BUILDING_CSMain_RGBA_to_BGR +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float3 val0 = inTex.Load (uint3 (tid.x * 4, tid.y, 0)).rgb; + float3 val1 = inTex.Load (uint3 (tid.x * 4 + 1, tid.y, 0)).rgb; + float3 val2 = inTex.Load (uint3 (tid.x * 4 + 2, tid.y, 0)).rgb; + float3 val3 = inTex.Load (uint3 (tid.x * 4 + 3, tid.y, 0)).rgb; + + outTex[uint2(tid.x * 3, tid.y)] = float4 (val0.b, val0.g, val0.r, val1.b); + outTex[uint2(tid.x * 3 + 1, tid.y)] = float4 (val1.g, val1.r, val2.b, val2.g); + outTex[uint2(tid.x * 3 + 2, tid.y)] = float4 (val2.r, val3.b, val3.g, val3.r); +} +#endif + +#ifdef BUILDING_CSMain_RGBA_to_RGB16 +Texture2D<float4> inTex : register(t0); +RWTexture2D<uint> outTex : register(u0); + +void Execute (uint3 tid) +{ + float3 val = inTex.Load (tid).rgb; + uint R = val.r * 31; + uint G = val.g * 63; + uint B = val.b * 31; + + outTex[tid.xy] = (R << 11) | (G << 5) | B; +} +#endif + +#ifdef BUILDING_CSMain_RGBA_to_BGR16 +Texture2D<float4> inTex : register(t0); +RWTexture2D<uint> outTex : register(u0); + +void Execute (uint3 tid) +{ + float3 val = inTex.Load (tid).rgb; + uint R = val.r * 31; + uint G = val.g * 63; + uint B = val.b * 31; + + outTex[tid.xy] = (B << 11) | (G << 5) | R; +} +#endif + +#ifdef BUILDING_CSMain_RGBA_to_RGB15 +Texture2D<float4> inTex : register(t0); +RWTexture2D<uint> outTex : register(u0); + +void Execute (uint3 tid) +{ + uint3 val = inTex.Load (tid).rgb * 31; + + outTex[tid.xy] = (val.r << 10) | (val.g << 5) | val.b; +} +#endif + +#ifdef BUILDING_CSMain_RGBA_to_BGR15 +Texture2D<float4> inTex : register(t0); +RWTexture2D<uint> outTex : register(u0); + +void Execute (uint3 tid) +{ + uint3 val = inTex.Load (tid).rgb * 31; + + outTex[tid.xy] = (val.b << 10) | (val.g << 5) | val.r; +} +#endif + +#ifdef BUILDING_CSMain_RGBA_to_r210 +Texture2D<float4> inTex : register(t0); +RWTexture2D<uint> outTex : register(u0); + +void Execute (uint3 tid) +{ + uint3 val = inTex.Load (tid).rgb * 1023; + uint packed = (val.r << 20) | (val.g << 10) | val.b; + uint packed_be = ((packed & 0xff) << 24) | ((packed & 0xff00) << 8) | + ((packed & 0xff0000) >> 8) | ((packed & 0xff000000) >> 24); + + outTex[tid.xy] = packed_be; +} +#endif + +#ifdef BUILDING_CSMain_RGBA_to_BGRA +Texture2D<float4> inTex : register(t0); +RWTexture2D<unorm float4> outTex : register(u0); + +void Execute (uint3 tid) +{ + float4 val = inTex.Load (tid); + + outTex[tid.xy] = val.bgra; +} +#endif + +[numthreads(8, 8, 1)] +void ENTRY_POINT (uint3 tid : SV_DispatchThreadID) +{ + Execute (tid); +} +#else +static const char str_CSMain_converter[] = +"#ifdef BUILDING_CSMain_YUY2_to_AYUV\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (tid);\n" +" float Y0 = val.r;\n" +" float U = val.g;\n" +" float Y1 = val.b;\n" +" float V = val.a;\n" +"\n" +" outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V);\n" +" outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_UYVY_to_AYUV\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (tid);\n" +" float Y0 = val.g;\n" +" float U = val.r;\n" +" float Y1 = val.a;\n" +" float V = val.b;\n" +"\n" +" outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V);\n" +" outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_VYUY_to_AYUV\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (tid);\n" +" float Y0 = val.g;\n" +" float U = val.b;\n" +" float Y1 = val.a;\n" +" float V = val.r;\n" +"\n" +" outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V);\n" +" outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_YVYU_to_AYUV\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (tid);\n" +" float Y0 = val.r;\n" +" float U = val.a;\n" +" float Y1 = val.b;\n" +" float V = val.g;\n" +"\n" +" outTex[uint2(tid.x * 2, tid.y)] = float4 (1.0, Y0, U, V);\n" +" outTex[uint2(tid.x * 2 + 1, tid.y)] = float4 (1.0, Y1, U, V);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_v210_to_AYUV\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" uint xpos = tid.x * 4;\n" +" float3 val = inTex.Load (uint3 (xpos, tid.y, 0)).xyz;\n" +" float U0 = val.r;\n" +" float Y0 = val.g;\n" +" float V0 = val.b;\n" +"\n" +" val = inTex.Load (uint3 (xpos + 1, tid.y, 0)).xyz;\n" +" float Y1 = val.r;\n" +" float U2 = val.g;\n" +" float Y2 = val.b;\n" +"\n" +" val = inTex.Load (uint3 (xpos + 2, tid.y, 0)).xyz;\n" +" float V2 = val.r;\n" +" float Y3 = val.g;\n" +" float U4 = val.b;\n" +"\n" +" val = inTex.Load (uint3 (xpos + 3, tid.y, 0)).xyz;\n" +" float Y4 = val.r;\n" +" float V4 = val.g;\n" +" float Y5 = val.b;\n" +"\n" +" xpos = tid.x * 6;\n" +" outTex[uint2(xpos, tid.y)] = float4 (1.0, Y0, U0, V0);\n" +" outTex[uint2(xpos + 1, tid.y)] = float4 (1.0, Y1, U0, V0);\n" +" outTex[uint2(xpos + 2, tid.y)] = float4 (1.0, Y2, U2, V2);\n" +" outTex[uint2(xpos + 3, tid.y)] = float4 (1.0, Y3, U2, V2);\n" +" outTex[uint2(xpos + 4, tid.y)] = float4 (1.0, Y4, U4, V4);\n" +" outTex[uint2(xpos + 5, tid.y)] = float4 (1.0, Y5, U4, V4);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_v308_to_AYUV\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (uint3 (tid.x * 3, tid.y, 0));\n" +" float Y0 = val.x;\n" +" float U0 = val.y;\n" +" float V0 = val.z;\n" +" float Y1 = val.w;\n" +"\n" +" val = inTex.Load (uint3 (tid.x * 3 + 1, tid.y, 0));\n" +" float U1 = val.x;\n" +" float V1 = val.y;\n" +" float Y2 = val.z;\n" +" float U2 = val.w;\n" +"\n" +" val = inTex.Load (uint3 (tid.x * 3 + 2, tid.y, 0));\n" +" float V2 = val.x;\n" +" float Y3 = val.y;\n" +" float U3 = val.z;\n" +" float V3 = val.w;\n" +"\n" +" outTex[uint2(tid.x * 4, tid.y)] = float4 (1.0, Y0, U0, V0);\n" +" outTex[uint2(tid.x * 4 + 1, tid.y)] = float4 (1.0, Y1, U1, V1);\n" +" outTex[uint2(tid.x * 4 + 2, tid.y)] = float4 (1.0, Y2, U2, V2);\n" +" outTex[uint2(tid.x * 4 + 3, tid.y)] = float4 (1.0, Y3, U3, V3);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_IYU2_to_AYUV\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (uint3 (tid.x * 3, tid.y, 0));\n" +" float U0 = val.x;\n" +" float Y0 = val.y;\n" +" float V0 = val.z;\n" +" float U1 = val.w;\n" +"\n" +" val = inTex.Load (uint3 (tid.x * 3 + 1, tid.y, 0));\n" +" float Y1 = val.x;\n" +" float V1 = val.y;\n" +" float U2 = val.z;\n" +" float Y2 = val.w;\n" +"\n" +" val = inTex.Load (uint3 (tid.x * 3 + 2, tid.y, 0));\n" +" float V2 = val.x;\n" +" float U3 = val.y;\n" +" float Y3 = val.z;\n" +" float V3 = val.w;\n" +"\n" +" outTex[uint2(tid.x * 4, tid.y)] = float4 (1.0, Y0, U0, V0);\n" +" outTex[uint2(tid.x * 4 + 1, tid.y)] = float4 (1.0, Y1, U1, V1);\n" +" outTex[uint2(tid.x * 4 + 2, tid.y)] = float4 (1.0, Y2, U2, V2);\n" +" outTex[uint2(tid.x * 4 + 3, tid.y)] = float4 (1.0, Y3, U3, V3);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_AYUV_to_YUY2\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw;\n" +" float Y0 = val.x;\n" +" float U = val.y;\n" +" float V = val.z;\n" +" float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y;\n" +"\n" +" outTex[tid.xy] = float4 (Y0, U, Y1, V);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_AYUV_to_UYVY\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw;\n" +" float Y0 = val.x;\n" +" float U = val.y;\n" +" float V = val.z;\n" +" float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y;\n" +"\n" +" outTex[tid.xy] = float4 (U, Y0, V, Y1);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_AYUV_to_VYUY\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw;\n" +" float Y0 = val.x;\n" +" float U = val.y;\n" +" float V = val.z;\n" +" float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y;\n" +"\n" +" outTex[tid.xy] = float4 (V, Y0, U, Y1);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_AYUV_to_YVYU\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float3 val = inTex.Load (uint3(tid.x * 2, tid.y, 0)).yzw;\n" +" float Y0 = val.x;\n" +" float U = val.y;\n" +" float V = val.z;\n" +" float Y1 = inTex.Load (uint3(tid.x * 2 + 1, tid.y, 0)).y;\n" +"\n" +" outTex[tid.xy] = float4 (Y0, V, Y1, U);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_AYUV_to_v210\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" uint xpos = tid.x * 6;\n" +" float3 val = inTex.Load (uint3 (xpos, tid.y, 0)).yzw;\n" +" float Y0 = val.x;\n" +" float U0 = val.y;\n" +" float V0 = val.z;\n" +" float Y1 = inTex.Load (uint3 (xpos + 1, tid.y, 0)).y;\n" +"\n" +" val = inTex.Load (uint3 (xpos + 2, tid.y, 0)).yzw;\n" +" float Y2 = val.x;\n" +" float U2 = val.y;\n" +" float V2 = val.z;\n" +" float Y3 = inTex.Load (uint3 (xpos + 3, tid.y, 0)).y;\n" +"\n" +" val = inTex.Load (uint3 (xpos + 4, tid.y, 0)).yzw;\n" +" float Y4 = val.x;\n" +" float U4 = val.y;\n" +" float V4 = val.z;\n" +" float Y5 = inTex.Load (uint3 (xpos + 5, tid.y, 0)).y;\n" +"\n" +" xpos = tid.x * 4;\n" +" outTex[uint2(xpos, tid.y)] = float4 (U0, Y0, V0, 0);\n" +" outTex[uint2(xpos + 1, tid.y)] = float4 (Y1, U2, Y2, 0);\n" +" outTex[uint2(xpos + 2, tid.y)] = float4 (V2, Y3, U4, 0);\n" +" outTex[uint2(xpos + 3, tid.y)] = float4 (Y4, V4, Y5, 0);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_AYUV_to_v308\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float3 val0 = inTex.Load (uint3 (tid.x * 4, tid.y, 0)).yzw;\n" +" float3 val1 = inTex.Load (uint3 (tid.x * 4 + 1, tid.y, 0)).yzw;\n" +" float3 val2 = inTex.Load (uint3 (tid.x * 4 + 2, tid.y, 0)).yzw;\n" +" float3 val3 = inTex.Load (uint3 (tid.x * 4 + 3, tid.y, 0)).yzw;\n" +"\n" +" outTex[uint2(tid.x * 3, tid.y)] = float4 (val0.x, val0.y, val0.z, val1.x);\n" +" outTex[uint2(tid.x * 3 + 1, tid.y)] = float4 (val1.y, val1.z, val2.x, val2.y);\n" +" outTex[uint2(tid.x * 3 + 2, tid.y)] = float4 (val2.z, val3.x, val3.y, val3.z);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_AYUV_to_IYU2\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float3 val0 = inTex.Load (uint3 (tid.x * 4, tid.y, 0)).zyw;\n" +" float3 val1 = inTex.Load (uint3 (tid.x * 4 + 1, tid.y, 0)).zyw;\n" +" float3 val2 = inTex.Load (uint3 (tid.x * 4 + 2, tid.y, 0)).zyw;\n" +" float3 val3 = inTex.Load (uint3 (tid.x * 4 + 3, tid.y, 0)).zyw;\n" +"\n" +" outTex[uint2(tid.x * 3, tid.y)] = float4 (val0.x, val0.y, val0.z, val1.x);\n" +" outTex[uint2(tid.x * 3 + 1, tid.y)] = float4 (val1.y, val1.z, val2.x, val2.y);\n" +" outTex[uint2(tid.x * 3 + 2, tid.y)] = float4 (val2.z, val3.x, val3.y, val3.z);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_AYUV_to_Y410\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (tid);\n" +" float Y = val.y;\n" +" float U = val.z;\n" +" float V = val.w;\n" +" float A = val.x;\n" +"\n" +" outTex[tid.xy] = float4 (U, Y, V, A);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGB_to_RGBA\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (uint3 (tid.x * 3, tid.y, 0));\n" +" float R0 = val.r;\n" +" float G0 = val.g;\n" +" float B0 = val.b;\n" +" float R1 = val.a;\n" +"\n" +" val = inTex.Load (uint3 (tid.x * 3 + 1, tid.y, 0));\n" +" float G1 = val.r;\n" +" float B1 = val.g;\n" +" float R2 = val.b;\n" +" float G2 = val.a;\n" +"\n" +" val = inTex.Load (uint3 (tid.x * 3 + 2, tid.y, 0));\n" +" float B2 = val.r;\n" +" float R3 = val.g;\n" +" float G3 = val.b;\n" +" float B3 = val.a;\n" +"\n" +" outTex[uint2(tid.x * 4, tid.y)] = float4 (R0, G0, B0, 1.0);\n" +" outTex[uint2(tid.x * 4 + 1, tid.y)] = float4 (R1, G1, B1, 1.0);\n" +" outTex[uint2(tid.x * 4 + 2, tid.y)] = float4 (R2, G2, B2, 1.0);\n" +" outTex[uint2(tid.x * 4 + 3, tid.y)] = float4 (R3, G3, B3, 1.0);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_BGR_to_RGBA\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (uint3 (tid.x * 3, tid.y, 0));\n" +" float B0 = val.r;\n" +" float G0 = val.g;\n" +" float R0 = val.b;\n" +" float B1 = val.a;\n" +"\n" +" val = inTex.Load (uint3 (tid.x * 3 + 1, tid.y, 0));\n" +" float G1 = val.r;\n" +" float R1 = val.g;\n" +" float B2 = val.b;\n" +" float G2 = val.a;\n" +"\n" +" val = inTex.Load (uint3 (tid.x * 3 + 2, tid.y, 0));\n" +" float R2 = val.r;\n" +" float B3 = val.g;\n" +" float G3 = val.b;\n" +" float R3 = val.a;\n" +"\n" +" outTex[uint2(tid.x * 4, tid.y)] = float4 (R0, G0, B0, 1.0);\n" +" outTex[uint2(tid.x * 4 + 1, tid.y)] = float4 (R1, G1, B1, 1.0);\n" +" outTex[uint2(tid.x * 4 + 2, tid.y)] = float4 (R2, G2, B2, 1.0);\n" +" outTex[uint2(tid.x * 4 + 3, tid.y)] = float4 (R3, G3, B3, 1.0);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGB16_to_RGBA\n" +"Texture2D<uint> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" uint val = inTex.Load (tid);\n" +" float R = float ((val & 0xf800) >> 11) / 31;\n" +" float G = float ((val & 0x7e0) >> 5) / 63;\n" +" float B = float ((val & 0x1f)) / 31;\n" +"\n" +" outTex[tid.xy] = float4 (R, G, B, 1.0);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_BGR16_to_RGBA\n" +"Texture2D<uint> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" uint val = inTex.Load (tid);\n" +" float B = float ((val & 0xf800) >> 11) / 31;\n" +" float G = float ((val & 0x7e0) >> 5) / 63;\n" +" float R = float ((val & 0x1f)) / 31;\n" +"\n" +" outTex[tid.xy] = float4 (R, G, B, 1.0);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGB15_to_RGBA\n" +"Texture2D<uint> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" uint val = inTex.Load (tid);\n" +" uint R = (val & 0x7c00) >> 10;\n" +" uint G = (val & 0x3e0) >> 5;\n" +" uint B = (val & 0x1f);\n" +"\n" +" outTex[tid.xy] = float4 (float3 (R, G, B) / 31, 1.0);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_BGR15_to_RGBA\n" +"Texture2D<uint> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" uint val = inTex.Load (tid);\n" +" uint B = (val & 0x7c00) >> 10;\n" +" uint G = (val & 0x3e0) >> 5;\n" +" uint R = (val & 0x1f);\n" +"\n" +" outTex[tid.xy] = float4 (float3 (R, G, B) / 31, 1.0);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_r210_to_RGBA\n" +"Texture2D<uint> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" uint val = inTex.Load (tid);\n" +" uint val_be = ((val & 0xff) << 24) | ((val & 0xff00) << 8) |\n" +" ((val & 0xff0000) >> 8) | ((val & 0xff000000) >> 24);\n" +" uint R = (val_be >> 20) & 0x3ff;\n" +" uint G = (val_be >> 10) & 0x3ff;\n" +" uint B = val_be & 0x3ff;\n" +"\n" +" outTex[tid.xy] = float4 (float3 (R, G, B) / 1023, 1.0);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGBA_to_RGB\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float3 val0 = inTex.Load (uint3 (tid.x * 4, tid.y, 0)).rgb;\n" +" float3 val1 = inTex.Load (uint3 (tid.x * 4 + 1, tid.y, 0)).rgb;\n" +" float3 val2 = inTex.Load (uint3 (tid.x * 4 + 2, tid.y, 0)).rgb;\n" +" float3 val3 = inTex.Load (uint3 (tid.x * 4 + 3, tid.y, 0)).rgb;\n" +"\n" +" outTex[uint2(tid.x * 3, tid.y)] = float4 (val0.r, val0.g, val0.b, val1.r);\n" +" outTex[uint2(tid.x * 3 + 1, tid.y)] = float4 (val1.g, val1.b, val2.r, val2.g);\n" +" outTex[uint2(tid.x * 3 + 2, tid.y)] = float4 (val2.b, val3.r, val3.g, val3.b);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGBA_to_BGR\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float3 val0 = inTex.Load (uint3 (tid.x * 4, tid.y, 0)).rgb;\n" +" float3 val1 = inTex.Load (uint3 (tid.x * 4 + 1, tid.y, 0)).rgb;\n" +" float3 val2 = inTex.Load (uint3 (tid.x * 4 + 2, tid.y, 0)).rgb;\n" +" float3 val3 = inTex.Load (uint3 (tid.x * 4 + 3, tid.y, 0)).rgb;\n" +"\n" +" outTex[uint2(tid.x * 3, tid.y)] = float4 (val0.b, val0.g, val0.r, val1.b);\n" +" outTex[uint2(tid.x * 3 + 1, tid.y)] = float4 (val1.g, val1.r, val2.b, val2.g);\n" +" outTex[uint2(tid.x * 3 + 2, tid.y)] = float4 (val2.r, val3.b, val3.g, val3.r);\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGBA_to_RGB16\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<uint> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float3 val = inTex.Load (tid).rgb;\n" +" uint R = val.r * 31;\n" +" uint G = val.g * 63;\n" +" uint B = val.b * 31;\n" +"\n" +" outTex[tid.xy] = (R << 11) | (G << 5) | B;\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGBA_to_BGR16\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<uint> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float3 val = inTex.Load (tid).rgb;\n" +" uint R = val.r * 31;\n" +" uint G = val.g * 63;\n" +" uint B = val.b * 31;\n" +"\n" +" outTex[tid.xy] = (B << 11) | (G << 5) | R;\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGBA_to_RGB15\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<uint> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" uint3 val = inTex.Load (tid).rgb * 31;\n" +"\n" +" outTex[tid.xy] = (val.r << 10) | (val.g << 5) | val.b;\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGBA_to_BGR15\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<uint> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" uint3 val = inTex.Load (tid).rgb * 31;\n" +"\n" +" outTex[tid.xy] = (val.b << 10) | (val.g << 5) | val.r;\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGBA_to_r210\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<uint> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" uint3 val = inTex.Load (tid).rgb * 1023;\n" +" uint packed = (val.r << 20) | (val.g << 10) | val.b;\n" +" uint packed_be = ((packed & 0xff) << 24) | ((packed & 0xff00) << 8) |\n" +" ((packed & 0xff0000) >> 8) | ((packed & 0xff000000) >> 24);\n" +"\n" +" outTex[tid.xy] = packed_be;\n" +"}\n" +"#endif\n" +"\n" +"#ifdef BUILDING_CSMain_RGBA_to_BGRA\n" +"Texture2D<float4> inTex : register(t0);\n" +"RWTexture2D<unorm float4> outTex : register(u0);\n" +"\n" +"void Execute (uint3 tid)\n" +"{\n" +" float4 val = inTex.Load (tid);\n" +"\n" +" outTex[tid.xy] = val.bgra;\n" +"}\n" +"#endif\n" +"\n" +"[numthreads(8, 8, 1)]\n" +"void ENTRY_POINT (uint3 tid : SV_DispatchThreadID)\n" +"{\n" +" Execute (tid);\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/PSMain_converter.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/PSMain_converter.hlsl new file mode 100644 index 0000000000..55fdcc01af --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/PSMain_converter.hlsl @@ -0,0 +1,2257 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +cbuffer PsAlphaFactor : register(b1) +{ + float alphaFactor; +}; + +struct PSColorSpace +{ + float3 CoeffX; + float3 CoeffY; + float3 CoeffZ; + float3 Offset; + float3 Min; + float3 Max; + float padding; +}; + +cbuffer PsConstBuffer : register(b2) +{ + PSColorSpace preCoeff; + PSColorSpace postCoeff; + PSColorSpace primariesCoeff; +}; + +Texture2D shaderTexture_0 : register(t0); +Texture2D shaderTexture_1 : register(t1); +Texture2D shaderTexture_2 : register(t2); +Texture2D shaderTexture_3 : register(t3); +Texture1D<float> gammaDecLUT : register(t4); +Texture1D<float> gammaEncLUT: register(t5); + +SamplerState samplerState : register(s0); +SamplerState lutSamplerState : register(s1); + +struct PS_INPUT +{ + float4 Position: SV_POSITION; + float2 Texture: TEXCOORD; +}; + +struct PS_OUTPUT_LUMA +{ + float4 Plane0: SV_TARGET0; +}; + +struct PS_OUTPUT_CHROMA +{ + float4 Plane0: SV_TARGET0; +}; + +struct PS_OUTPUT_CHROMA_PLANAR +{ + float4 Plane0: SV_TARGET0; + float4 Plane1: SV_TARGET1; +}; + +struct PS_OUTPUT_PLANAR +{ + float4 Plane0: SV_TARGET0; + float4 Plane1: SV_TARGET1; + float4 Plane2: SV_TARGET2; +}; + +struct PS_OUTPUT_PLANAR_FULL +{ + float4 Plane0: SV_TARGET0; + float4 Plane1: SV_TARGET1; + float4 Plane2: SV_TARGET2; + float4 Plane3: SV_TARGET3; +}; + +struct PS_OUTPUT_PACKED +{ + float4 Plane0: SV_TARGET0; +}; + +float4 DoAlphaPremul (float4 sample) +{ + float4 premul_tex; + premul_tex.rgb = sample.rgb * sample.a; + premul_tex.a = sample.a; + return premul_tex; +} + +float4 DoAlphaUnpremul (float4 sample) +{ + float4 unpremul_tex; + if (sample.a == 0 || sample.a == 1) { + unpremul_tex = sample; + } else { + unpremul_tex.rgb = saturate (sample.rgb / sample.a); + unpremul_tex.a = sample.a; + } + + return sample; +} + +interface ISampler +{ + float4 Execute (float2 uv); +}; + +class SamplerGRAY : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.x = shaderTexture_0.Sample(samplerState, uv).x; + sample.y = 0.5; + sample.z = 0.5; + sample.a = 1.0; + return sample; + } +}; + +class SamplerNV12 : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.x = shaderTexture_0.Sample(samplerState, uv).x; + sample.yz = shaderTexture_1.Sample(samplerState, uv).xy; + sample.a = 1.0; + return sample; + } +}; + +class SamplerNV21 : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.x = shaderTexture_0.Sample(samplerState, uv).x; + sample.yz = shaderTexture_1.Sample(samplerState, uv).yx; + sample.a = 1.0; + return sample; + } +}; + +class SamplerI420 : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.x = shaderTexture_0.Sample(samplerState, uv).x; + sample.y = shaderTexture_1.Sample(samplerState, uv).x; + sample.z = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = 1.0; + return sample; + } +}; + +class SamplerYV12 : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.x = shaderTexture_0.Sample(samplerState, uv).x; + sample.z = shaderTexture_1.Sample(samplerState, uv).x; + sample.y = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = 1.0; + return sample; + } +}; + +class SamplerI420_10 : ISampler +{ + float4 Execute (float2 uv) + { + float3 sample; + sample.x = shaderTexture_0.Sample(samplerState, uv).x; + sample.y = shaderTexture_1.Sample(samplerState, uv).x; + sample.z = shaderTexture_2.Sample(samplerState, uv).x; + return float4 (saturate (sample * 64.0), 1.0); + } +}; + +class SamplerI420_12 : ISampler +{ + float4 Execute (float2 uv) + { + float3 sample; + sample.x = shaderTexture_0.Sample(samplerState, uv).x; + sample.y = shaderTexture_1.Sample(samplerState, uv).x; + sample.z = shaderTexture_2.Sample(samplerState, uv).x; + return float4 (saturate (sample * 16.0), 1.0); + } +}; + +class SamplerVUYA : ISampler +{ + float4 Execute (float2 uv) + { + return shaderTexture_0.Sample(samplerState, uv).zyxw; + } +}; + +class SamplerVUYAPremul : ISampler +{ + float4 Execute (float2 uv) + { + return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).zyxw); + } +}; + +class SamplerY410 : ISampler +{ + float4 Execute (float2 uv) + { + return float4 (shaderTexture_0.Sample(samplerState, uv).yxz, 1.0); + } +}; + +class SamplerY412 : ISampler +{ + float4 Execute (float2 uv) + { + return shaderTexture_0.Sample(samplerState, uv).grba; + } +}; + +class SamplerY412Premul : ISampler +{ + float4 Execute (float2 uv) + { + return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).grba); + } +}; + +class SamplerAYUV : ISampler +{ + float4 Execute (float2 uv) + { + return shaderTexture_0.Sample(samplerState, uv).yzwx; + } +}; + +class SamplerAYUVPremul : ISampler +{ + float4 Execute (float2 uv) + { + return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).yzwx); + } +}; + +class SamplerRGBA : ISampler +{ + float4 Execute (float2 uv) + { + return shaderTexture_0.Sample(samplerState, uv); + } +}; + +class SamplerRGBAPremul : ISampler +{ + float4 Execute (float2 uv) + { + return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv)); + } +}; + +class SamplerRGBx : ISampler +{ + float4 Execute (float2 uv) + { + return float4 (shaderTexture_0.Sample(samplerState, uv).rgb, 1.0); + } +}; + +class SamplerxRGB : ISampler +{ + float4 Execute (float2 uv) + { + return float4 (shaderTexture_0.Sample(samplerState, uv).gba, 1.0); + } +}; + +class SamplerARGB : ISampler +{ + float4 Execute (float2 uv) + { + return shaderTexture_0.Sample(samplerState, uv).gbar; + } +}; + +class SamplerARGBPremul : ISampler +{ + float4 Execute (float2 uv) + { + return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).gbar); + } +}; + +class SamplerxBGR : ISampler +{ + float4 Execute (float2 uv) + { + return float4 (shaderTexture_0.Sample(samplerState, uv).abg, 1.0); + } +}; + +class SamplerABGR : ISampler +{ + float4 Execute (float2 uv) + { + return shaderTexture_0.Sample(samplerState, uv).abgr; + } +}; + +class SamplerABGRPremul : ISampler +{ + float4 Execute (float2 uv) + { + return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).abgr); + } +}; + +class SamplerBGR10A2 : ISampler +{ + float4 Execute (float2 uv) + { + return float4 (shaderTexture_0.Sample(samplerState, uv).zyx, 1.0); + } +}; + +class SamplerBGRA64 : ISampler +{ + float4 Execute (float2 uv) + { + return shaderTexture_0.Sample(samplerState, uv).bgra; + } +}; + +class SamplerBGRA64Premul : ISampler +{ + float4 Execute (float2 uv) + { + return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).bgra); + } +}; + +class SamplerGBR : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.g = shaderTexture_0.Sample(samplerState, uv).x; + sample.b = shaderTexture_1.Sample(samplerState, uv).x; + sample.r = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = 1.0; + return sample; + } +}; + +class SamplerGBR_10 : ISampler +{ + float4 Execute (float2 uv) + { + float3 sample; + sample.g = shaderTexture_0.Sample(samplerState, uv).x; + sample.b = shaderTexture_1.Sample(samplerState, uv).x; + sample.r = shaderTexture_2.Sample(samplerState, uv).x; + return float4 (saturate (sample * 64.0), 1.0); + } +}; + +class SamplerGBR_12 : ISampler +{ + float4 Execute (float2 uv) + { + float3 sample; + sample.g = shaderTexture_0.Sample(samplerState, uv).x; + sample.b = shaderTexture_1.Sample(samplerState, uv).x; + sample.r = shaderTexture_2.Sample(samplerState, uv).x; + return float4 (saturate (sample * 16.0), 1.0); + } +}; + +class SamplerGBRA : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.g = shaderTexture_0.Sample(samplerState, uv).x; + sample.b = shaderTexture_1.Sample(samplerState, uv).x; + sample.r = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = shaderTexture_3.Sample(samplerState, uv).x; + return sample; + } +}; + +class SamplerGBRAPremul : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.g = shaderTexture_0.Sample(samplerState, uv).x; + sample.b = shaderTexture_1.Sample(samplerState, uv).x; + sample.r = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = shaderTexture_3.Sample(samplerState, uv).x; + return DoAlphaUnpremul (sample); + } +}; + +class SamplerGBRA_10 : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.g = shaderTexture_0.Sample(samplerState, uv).x; + sample.b = shaderTexture_1.Sample(samplerState, uv).x; + sample.r = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = shaderTexture_3.Sample(samplerState, uv).x; + return saturate (sample * 64.0); + } +}; + +class SamplerGBRAPremul_10 : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.g = shaderTexture_0.Sample(samplerState, uv).x; + sample.b = shaderTexture_1.Sample(samplerState, uv).x; + sample.r = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = shaderTexture_3.Sample(samplerState, uv).x; + return DoAlphaUnpremul (saturate (sample * 64.0)); + } +}; + +class SamplerGBRA_12 : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.g = shaderTexture_0.Sample(samplerState, uv).x; + sample.b = shaderTexture_1.Sample(samplerState, uv).x; + sample.r = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = shaderTexture_3.Sample(samplerState, uv).x; + return saturate (sample * 16.0); + } +}; + +class SamplerGBRAPremul_12 : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.g = shaderTexture_0.Sample(samplerState, uv).x; + sample.b = shaderTexture_1.Sample(samplerState, uv).x; + sample.r = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = shaderTexture_3.Sample(samplerState, uv).x; + return DoAlphaUnpremul (saturate (sample * 16.0)); + } +}; + +class SamplerRGBP : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.r = shaderTexture_0.Sample(samplerState, uv).x; + sample.g = shaderTexture_1.Sample(samplerState, uv).x; + sample.b = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = 1.0; + return sample; + } +}; + +class SamplerBGRP : ISampler +{ + float4 Execute (float2 uv) + { + float4 sample; + sample.b = shaderTexture_0.Sample(samplerState, uv).x; + sample.g = shaderTexture_1.Sample(samplerState, uv).x; + sample.r = shaderTexture_2.Sample(samplerState, uv).x; + sample.a = 1.0; + return sample; + } +}; + +class SamplerRBGA : ISampler +{ + float4 Execute (float2 uv) + { + return shaderTexture_0.Sample(samplerState, uv).rbga; + } +}; + +class SamplerRBGAPremul : ISampler +{ + float4 Execute (float2 uv) + { + return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).rbga); + } +}; + +interface IConverter +{ + float4 Execute (float4 sample); +}; + +class ConverterIdentity : IConverter +{ + float4 Execute (float4 sample) + { + return sample; + } +}; + +class ConverterRange : IConverter +{ + float4 Execute (float4 sample) + { + float3 out_space; + out_space.x = postCoeff.CoeffX.x * sample.x; + out_space.y = postCoeff.CoeffY.y * sample.y; + out_space.z = postCoeff.CoeffZ.z * sample.z; + out_space += postCoeff.Offset; + return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a); + } +}; + +class ConverterSimple : IConverter +{ + float4 Execute (float4 sample) + { + float3 out_space; + out_space.x = dot (postCoeff.CoeffX, sample.xyz); + out_space.y = dot (postCoeff.CoeffY, sample.xyz); + out_space.z = dot (postCoeff.CoeffZ, sample.xyz); + out_space += postCoeff.Offset; + return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a); + } +}; + +class ConverterGamma : IConverter +{ + float4 Execute (float4 sample) + { + float3 out_space; + out_space.x = dot (preCoeff.CoeffX, sample.xyz); + out_space.y = dot (preCoeff.CoeffY, sample.xyz); + out_space.z = dot (preCoeff.CoeffZ, sample.xyz); + out_space += preCoeff.Offset; + out_space = clamp (out_space, preCoeff.Min, preCoeff.Max); + + out_space.x = gammaDecLUT.Sample (lutSamplerState, out_space.x); + out_space.y = gammaDecLUT.Sample (lutSamplerState, out_space.y); + out_space.z = gammaDecLUT.Sample (lutSamplerState, out_space.z); + + out_space.x = gammaEncLUT.Sample (lutSamplerState, out_space.x); + out_space.y = gammaEncLUT.Sample (lutSamplerState, out_space.y); + out_space.z = gammaEncLUT.Sample (lutSamplerState, out_space.z); + + out_space.x = dot (postCoeff.CoeffX, out_space); + out_space.y = dot (postCoeff.CoeffY, out_space); + out_space.z = dot (postCoeff.CoeffZ, out_space); + out_space += postCoeff.Offset; + return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a); + } +}; + +class ConverterPrimary : IConverter +{ + float4 Execute (float4 sample) + { + float3 out_space; + float3 tmp; + out_space.x = dot (preCoeff.CoeffX, sample.xyz); + out_space.y = dot (preCoeff.CoeffY, sample.xyz); + out_space.z = dot (preCoeff.CoeffZ, sample.xyz); + out_space += preCoeff.Offset; + out_space = clamp (out_space, preCoeff.Min, preCoeff.Max); + + out_space.x = gammaDecLUT.Sample (lutSamplerState, out_space.x); + out_space.y = gammaDecLUT.Sample (lutSamplerState, out_space.y); + out_space.z = gammaDecLUT.Sample (lutSamplerState, out_space.z); + + tmp.x = dot (primariesCoeff.CoeffX, out_space); + tmp.y = dot (primariesCoeff.CoeffY, out_space); + tmp.z = dot (primariesCoeff.CoeffZ, out_space); + + out_space.x = gammaEncLUT.Sample (lutSamplerState, tmp.x); + out_space.y = gammaEncLUT.Sample (lutSamplerState, tmp.y); + out_space.z = gammaEncLUT.Sample (lutSamplerState, tmp.z); + + out_space.x = dot (postCoeff.CoeffX, out_space); + out_space.y = dot (postCoeff.CoeffY, out_space); + out_space.z = dot (postCoeff.CoeffZ, out_space); + out_space += postCoeff.Offset; + return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a); + } +}; + +float UnormTo10bit (float sample) +{ + return sample * 1023.0 / 65535.0; +} + +float2 UnormTo10bit (float2 sample) +{ + return sample * 1023.0 / 65535.0; +} + +float3 UnormTo10bit (float3 sample) +{ + return sample * 1023.0 / 65535.0; +} + +float4 UnormTo10bit (float4 sample) +{ + return sample * 1023.0 / 65535.0; +} + +float UnormTo12bit (float sample) +{ + return sample * 4095.0 / 65535.0; +} + +float2 UnormTo12bit (float2 sample) +{ + return sample * 4095.0 / 65535.0; +} + +float3 UnormTo12bit (float3 sample) +{ + return sample * 4095.0 / 65535.0; +} + +float4 UnormTo12bit (float4 sample) +{ + return sample * 4095.0 / 65535.0; +} + +interface IOutputLuma +{ + PS_OUTPUT_LUMA Build (float4 sample); +}; + +class OutputLuma : IOutputLuma +{ + PS_OUTPUT_LUMA Build (float4 sample) + { + PS_OUTPUT_LUMA output; + output.Plane0 = float4 (sample.x, 0, 0, 0); + return output; + } +}; + +class OutputLuma_10 : IOutputLuma +{ + PS_OUTPUT_LUMA Build (float4 sample) + { + PS_OUTPUT_LUMA output; + output.Plane0 = float4 (UnormTo10bit (sample.x), 0, 0, 0); + return output; + } +}; + +class OutputLuma_12 : IOutputLuma +{ + PS_OUTPUT_LUMA Build (float4 sample) + { + PS_OUTPUT_LUMA output; + output.Plane0 = float4 (UnormTo12bit (sample.x), 0, 0, 0); + return output; + } +}; + +interface IOutputChroma +{ + PS_OUTPUT_CHROMA Build (float4 sample); +}; + +class OutputChromaNV12 : IOutputChroma +{ + PS_OUTPUT_CHROMA Build (float4 sample) + { + PS_OUTPUT_CHROMA output; + output.Plane0 = float4 (sample.yz, 0, 0); + return output; + } +}; + +class OutputChromaNV21 : IOutputChroma +{ + PS_OUTPUT_CHROMA Build (float4 sample) + { + PS_OUTPUT_CHROMA output; + output.Plane0 = float4 (sample.zy, 0, 0); + return output; + } +}; + +interface IOutputChromaPlanar +{ PS_OUTPUT_CHROMA_PLANAR Build (float4 sample); +}; + +class OutputChromaI420 : IOutputChromaPlanar +{ + PS_OUTPUT_CHROMA_PLANAR Build (float4 sample) + { + PS_OUTPUT_CHROMA_PLANAR output; + output.Plane0 = float4 (sample.y, 0, 0, 0); + output.Plane1 = float4 (sample.z, 0, 0, 0); + return output; + } +}; + +class OutputChromaYV12 : IOutputChromaPlanar +{ + PS_OUTPUT_CHROMA_PLANAR Build (float4 sample) + { + PS_OUTPUT_CHROMA_PLANAR output; + output.Plane0 = float4 (sample.z, 0, 0, 0); + output.Plane1 = float4 (sample.y, 0, 0, 0); + return output; + } +}; + +class OutputChromaI420_10 : IOutputChromaPlanar +{ + PS_OUTPUT_CHROMA_PLANAR Build (float4 sample) + { + PS_OUTPUT_CHROMA_PLANAR output; + float2 scaled = UnormTo10bit (sample.yz); + output.Plane0 = float4 (scaled.x, 0, 0, 0); + output.Plane1 = float4 (scaled.y, 0, 0, 0); + return output; + } +}; + +class OutputChromaI420_12 : IOutputChromaPlanar +{ + PS_OUTPUT_CHROMA_PLANAR Build (float4 sample) + { + PS_OUTPUT_CHROMA_PLANAR output; + float2 scaled = UnormTo12bit (sample.yz); + output.Plane0 = float4 (scaled.x, 0, 0, 0); + output.Plane1 = float4 (scaled.y, 0, 0, 0); + return output; + } +}; + +interface IOutputPlanar +{ + PS_OUTPUT_PLANAR Build (float4 sample); +}; + +class OutputY444 : IOutputPlanar +{ + PS_OUTPUT_PLANAR Build (float4 sample) + { + PS_OUTPUT_PLANAR output; + output.Plane0 = float4 (sample.x, 0, 0, 0); + output.Plane1 = float4 (sample.y, 0, 0, 0); + output.Plane2 = float4 (sample.z, 0, 0, 0); + return output; + } +}; + +class OutputY444_10 : IOutputPlanar +{ + PS_OUTPUT_PLANAR Build (float4 sample) + { + PS_OUTPUT_PLANAR output; + float3 scaled = UnormTo10bit (sample.xyz); + output.Plane0 = float4 (scaled.x, 0, 0, 0); + output.Plane1 = float4 (scaled.y, 0, 0, 0); + output.Plane2 = float4 (scaled.z, 0, 0, 0); + return output; + } +}; + +class OutputY444_12 : IOutputPlanar +{ + PS_OUTPUT_PLANAR Build (float4 sample) + { + PS_OUTPUT_PLANAR output; + float3 scaled = UnormTo12bit (sample.xyz); + output.Plane0 = float4 (scaled.x, 0, 0, 0); + output.Plane1 = float4 (scaled.y, 0, 0, 0); + output.Plane2 = float4 (scaled.z, 0, 0, 0); + return output; + } +}; + +class OutputGBR : IOutputPlanar +{ + PS_OUTPUT_PLANAR Build (float4 sample) + { + PS_OUTPUT_PLANAR output; + output.Plane0 = float4 (sample.g, 0, 0, 0); + output.Plane1 = float4 (sample.b, 0, 0, 0); + output.Plane2 = float4 (sample.r, 0, 0, 0); + return output; + } +}; + +class OutputGBR_10 : IOutputPlanar +{ + PS_OUTPUT_PLANAR Build (float4 sample) + { + PS_OUTPUT_PLANAR output; + float3 scaled = UnormTo10bit (sample.rgb); + output.Plane0 = float4 (scaled.g, 0, 0, 0); + output.Plane1 = float4 (scaled.b, 0, 0, 0); + output.Plane2 = float4 (scaled.r, 0, 0, 0); + return output; + } +}; + +class OutputGBR_12 : IOutputPlanar +{ + PS_OUTPUT_PLANAR Build (float4 sample) + { + PS_OUTPUT_PLANAR output; + float3 scaled = UnormTo12bit (sample.rgb); + output.Plane0 = float4 (scaled.g, 0, 0, 0); + output.Plane1 = float4 (scaled.b, 0, 0, 0); + output.Plane2 = float4 (scaled.r, 0, 0, 0); + return output; + } +}; + +class OutputRGBP : IOutputPlanar +{ + PS_OUTPUT_PLANAR Build (float4 sample) + { + PS_OUTPUT_PLANAR output; + output.Plane0 = float4 (sample.r, 0, 0, 0); + output.Plane1 = float4 (sample.g, 0, 0, 0); + output.Plane2 = float4 (sample.b, 0, 0, 0); + return output; + } +}; + +class OutputBGRP : IOutputPlanar +{ + PS_OUTPUT_PLANAR Build (float4 sample) + { + PS_OUTPUT_PLANAR output; + output.Plane0 = float4 (sample.b, 0, 0, 0); + output.Plane1 = float4 (sample.g, 0, 0, 0); + output.Plane2 = float4 (sample.r, 0, 0, 0); + return output; + } +}; + +interface IOutputPlanarFull +{ + PS_OUTPUT_PLANAR_FULL Build (float4 sample); +}; + +class OutputGBRA : IOutputPlanarFull +{ + PS_OUTPUT_PLANAR_FULL Build (float4 sample) + { + PS_OUTPUT_PLANAR_FULL output; + output.Plane0 = float4 (sample.g, 0, 0, 0); + output.Plane1 = float4 (sample.b, 0, 0, 0); + output.Plane2 = float4 (sample.r, 0, 0, 0); + output.Plane3 = float4 (sample.a * alphaFactor, 0, 0, 0); + return output; + } +}; + +class OutputGBRAPremul : IOutputPlanarFull +{ + PS_OUTPUT_PLANAR_FULL Build (float4 sample) + { + PS_OUTPUT_PLANAR_FULL output; + float4 premul; + sample.a *= alphaFactor; + premul = DoAlphaPremul (sample); + output.Plane0 = float4 (premul.g, 0, 0, 0); + output.Plane1 = float4 (premul.b, 0, 0, 0); + output.Plane2 = float4 (premul.r, 0, 0, 0); + output.Plane3 = float4 (premul.a, 0, 0, 0); + return output; + } +}; + +class OutputGBRA_10 : IOutputPlanarFull +{ + PS_OUTPUT_PLANAR_FULL Build (float4 sample) + { + PS_OUTPUT_PLANAR_FULL output; + float4 scaled; + sample.a *= alphaFactor; + scaled = UnormTo10bit (sample); + output.Plane0 = float4 (scaled.g, 0, 0, 0); + output.Plane1 = float4 (scaled.b, 0, 0, 0); + output.Plane2 = float4 (scaled.r, 0, 0, 0); + output.Plane3 = float4 (scaled.a, 0, 0, 0); + return output; + } +}; + +class OutputGBRAPremul_10 : IOutputPlanarFull +{ + PS_OUTPUT_PLANAR_FULL Build (float4 sample) + { + PS_OUTPUT_PLANAR_FULL output; + float4 scaled; + sample.a *= alphaFactor; + scaled = UnormTo10bit (DoAlphaPremul (sample)); + output.Plane0 = float4 (scaled.g, 0, 0, 0); + output.Plane1 = float4 (scaled.b, 0, 0, 0); + output.Plane2 = float4 (scaled.r, 0, 0, 0); + output.Plane3 = float4 (scaled.a, 0, 0, 0); + return output; + } +}; + +class OutputGBRA_12 : IOutputPlanarFull +{ + PS_OUTPUT_PLANAR_FULL Build (float4 sample) + { + PS_OUTPUT_PLANAR_FULL output; + float4 scaled; + sample.a *= alphaFactor; + scaled = UnormTo12bit (sample); + output.Plane0 = float4 (scaled.g, 0, 0, 0); + output.Plane1 = float4 (scaled.b, 0, 0, 0); + output.Plane2 = float4 (scaled.r, 0, 0, 0); + output.Plane3 = float4 (scaled.a, 0, 0, 0); + return output; + } +}; + +class OutputGBRAPremul_12 : IOutputPlanarFull +{ + PS_OUTPUT_PLANAR_FULL Build (float4 sample) + { + PS_OUTPUT_PLANAR_FULL output; + float4 scaled; + sample.a *= alphaFactor; + scaled = UnormTo12bit (DoAlphaPremul (sample)); + output.Plane0 = float4 (scaled.g, 0, 0, 0); + output.Plane1 = float4 (scaled.b, 0, 0, 0); + output.Plane2 = float4 (scaled.r, 0, 0, 0); + output.Plane3 = float4 (scaled.a, 0, 0, 0); + return output; + } +}; + +interface IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample); +}; + +class OutputRGBA : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + output.Plane0 = float4 (sample.rgb, sample.a * alphaFactor); + return output; + } +}; + +class OutputRGBAPremul : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + sample.a *= alphaFactor; + output.Plane0 = DoAlphaPremul (sample); + return output; + } +}; + +class OutputRGBx : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + output.Plane0 = float4 (sample.rgb, 1.0); + return output; + } +}; + +class OutputxRGB : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + output.Plane0 = float4 (0.0, sample.rgb); + return output; + } +}; + +class OutputARGB : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + output.Plane0 = sample.argb; + return output; + } +}; + +class OutputARGBPremul : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + output.Plane0 = DoAlphaPremul (sample).argb; + return output; + } +}; + +class OutputxBGR : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + output.Plane0 = float4 (0.0, sample.bgr); + return output; + } +}; + +class OutputABGR : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + output.Plane0 = sample.abgr; + return output; + } +}; + +class OutputABGRPremul : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + output.Plane0 = DoAlphaPremul (sample.abgr); + return output; + } +}; + +class OutputVUYA : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + sample.a *= alphaFactor; + output.Plane0 = sample.zyxw; + return output; + } +}; + +class OutputVUYAPremul : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + sample.a *= alphaFactor; + output.Plane0 = DoAlphaPremul (sample).zyxw; + return output; + } +}; + +class OutputAYUV : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + sample.a *= alphaFactor; + output.Plane0 = sample.wxyz; + return output; + } +}; + +class OutputAYUVPremul : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + sample.a *= alphaFactor; + output.Plane0 = DoAlphaPremul (sample).wxyz; + return output; + } +}; + +class OutputRBGA : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + sample.a *= alphaFactor; + output.Plane0 = sample.rbga; + return output; + } +}; + +class OutputRBGAPremul : IOutputPacked +{ + PS_OUTPUT_PACKED Build (float4 sample) + { + PS_OUTPUT_PACKED output; + sample.a *= alphaFactor; + output.Plane0 = DoAlphaPremul (sample).rbga; + return output; + } +}; + +OUTPUT_TYPE ENTRY_POINT (PS_INPUT input) +{ + SAMPLER g_sampler; + CONVERTER g_converter; + OUTPUT_BUILDER g_builder; + return g_builder.Build (g_converter.Execute (g_sampler.Execute (input.Texture))); +} +#else /* BUILDING_HLSL */ +static const char str_PSMain_converter[] = +"cbuffer PsAlphaFactor : register(b1)\n" +"{\n" +" float alphaFactor;\n" +"};\n" +"\n" +"struct PSColorSpace\n" +"{\n" +" float3 CoeffX;\n" +" float3 CoeffY;\n" +" float3 CoeffZ;\n" +" float3 Offset;\n" +" float3 Min;\n" +" float3 Max;\n" +" float padding;\n" +"};\n" +"\n" +"cbuffer PsConstBuffer : register(b2)\n" +"{\n" +" PSColorSpace preCoeff;\n" +" PSColorSpace postCoeff;\n" +" PSColorSpace primariesCoeff;\n" +"};\n" +"\n" +"Texture2D shaderTexture_0 : register(t0);\n" +"Texture2D shaderTexture_1 : register(t1);\n" +"Texture2D shaderTexture_2 : register(t2);\n" +"Texture2D shaderTexture_3 : register(t3);\n" +"Texture1D<float> gammaDecLUT : register(t4);\n" +"Texture1D<float> gammaEncLUT: register(t5);\n" +"\n" +"SamplerState samplerState : register(s0);\n" +"SamplerState lutSamplerState : register(s1);\n" +"\n" +"struct PS_INPUT\n" +"{\n" +" float4 Position: SV_POSITION;\n" +" float2 Texture: TEXCOORD;\n" +"};\n" +"\n" +"struct PS_OUTPUT_LUMA\n" +"{\n" +" float4 Plane0: SV_TARGET0;\n" +"};\n" +"\n" +"struct PS_OUTPUT_CHROMA\n" +"{\n" +" float4 Plane0: SV_TARGET0;\n" +"};\n" +"\n" +"struct PS_OUTPUT_CHROMA_PLANAR\n" +"{\n" +" float4 Plane0: SV_TARGET0;\n" +" float4 Plane1: SV_TARGET1;\n" +"};\n" +"\n" +"struct PS_OUTPUT_PLANAR\n" +"{\n" +" float4 Plane0: SV_TARGET0;\n" +" float4 Plane1: SV_TARGET1;\n" +" float4 Plane2: SV_TARGET2;\n" +"};\n" +"\n" +"struct PS_OUTPUT_PLANAR_FULL\n" +"{\n" +" float4 Plane0: SV_TARGET0;\n" +" float4 Plane1: SV_TARGET1;\n" +" float4 Plane2: SV_TARGET2;\n" +" float4 Plane3: SV_TARGET3;\n" +"};\n" +"\n" +"struct PS_OUTPUT_PACKED\n" +"{\n" +" float4 Plane0: SV_TARGET0;\n" +"};\n" +"\n" +"float4 DoAlphaPremul (float4 sample)\n" +"{\n" +" float4 premul_tex;\n" +" premul_tex.rgb = sample.rgb * sample.a;\n" +" premul_tex.a = sample.a;\n" +" return premul_tex;\n" +"}\n" +"\n" +"float4 DoAlphaUnpremul (float4 sample)\n" +"{\n" +" float4 unpremul_tex;\n" +" if (sample.a == 0 || sample.a == 1) {\n" +" unpremul_tex = sample;\n" +" } else {\n" +" unpremul_tex.rgb = saturate (sample.rgb / sample.a);\n" +" unpremul_tex.a = sample.a;\n" +" }\n" +"\n" +" return sample;\n" +"}\n" +"\n" +"interface ISampler\n" +"{\n" +" float4 Execute (float2 uv);\n" +"};\n" +"\n" +"class SamplerGRAY : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.x = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.y = 0.5;\n" +" sample.z = 0.5;\n" +" sample.a = 1.0;\n" +" return sample;\n" +" }\n" +"};\n" +"\n" +"class SamplerNV12 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.x = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.yz = shaderTexture_1.Sample(samplerState, uv).xy;\n" +" sample.a = 1.0;\n" +" return sample;\n" +" }\n" +"};\n" +"\n" +"class SamplerNV21 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.x = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.yz = shaderTexture_1.Sample(samplerState, uv).yx;\n" +" sample.a = 1.0;\n" +" return sample;\n" +" }\n" +"};\n" +"\n" +"class SamplerI420 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.x = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.y = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.z = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = 1.0;\n" +" return sample;\n" +" }\n" +"};\n" +"\n" +"class SamplerYV12 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.x = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.z = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.y = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = 1.0;\n" +" return sample;\n" +" }\n" +"};\n" +"\n" +"class SamplerI420_10 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float3 sample;\n" +" sample.x = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.y = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.z = shaderTexture_2.Sample(samplerState, uv).x;\n" +" return float4 (saturate (sample * 64.0), 1.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerI420_12 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float3 sample;\n" +" sample.x = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.y = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.z = shaderTexture_2.Sample(samplerState, uv).x;\n" +" return float4 (saturate (sample * 16.0), 1.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerVUYA : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return shaderTexture_0.Sample(samplerState, uv).zyxw;\n" +" }\n" +"};\n" +"\n" +"class SamplerVUYAPremul : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).zyxw);\n" +" }\n" +"};\n" +"\n" +"class SamplerY410 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return float4 (shaderTexture_0.Sample(samplerState, uv).yxz, 1.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerY412 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return shaderTexture_0.Sample(samplerState, uv).grba;\n" +" }\n" +"};\n" +"\n" +"class SamplerY412Premul : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).grba);\n" +" }\n" +"};\n" +"\n" +"class SamplerAYUV : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return shaderTexture_0.Sample(samplerState, uv).yzwx;\n" +" }\n" +"};\n" +"\n" +"class SamplerAYUVPremul : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).yzwx);\n" +" }\n" +"};\n" +"\n" +"class SamplerRGBA : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return shaderTexture_0.Sample(samplerState, uv);\n" +" }\n" +"};\n" +"\n" +"class SamplerRGBAPremul : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv));\n" +" }\n" +"};\n" +"\n" +"class SamplerRGBx : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return float4 (shaderTexture_0.Sample(samplerState, uv).rgb, 1.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerxRGB : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return float4 (shaderTexture_0.Sample(samplerState, uv).gba, 1.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerARGB : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return shaderTexture_0.Sample(samplerState, uv).gbar;\n" +" }\n" +"};\n" +"\n" +"class SamplerARGBPremul : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).gbar);\n" +" }\n" +"};\n" +"\n" +"class SamplerxBGR : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return float4 (shaderTexture_0.Sample(samplerState, uv).abg, 1.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerABGR : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return shaderTexture_0.Sample(samplerState, uv).abgr;\n" +" }\n" +"};\n" +"\n" +"class SamplerABGRPremul : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).abgr);\n" +" }\n" +"};\n" +"\n" +"class SamplerBGR10A2 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return float4 (shaderTexture_0.Sample(samplerState, uv).zyx, 1.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerBGRA64 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return shaderTexture_0.Sample(samplerState, uv).bgra;\n" +" }\n" +"};\n" +"\n" +"class SamplerBGRA64Premul : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).bgra);\n" +" }\n" +"};\n" +"\n" +"class SamplerGBR : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.g = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.b = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.r = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = 1.0;\n" +" return sample;\n" +" }\n" +"};\n" +"\n" +"class SamplerGBR_10 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float3 sample;\n" +" sample.g = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.b = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.r = shaderTexture_2.Sample(samplerState, uv).x;\n" +" return float4 (saturate (sample * 64.0), 1.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerGBR_12 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float3 sample;\n" +" sample.g = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.b = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.r = shaderTexture_2.Sample(samplerState, uv).x;\n" +" return float4 (saturate (sample * 16.0), 1.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerGBRA : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.g = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.b = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.r = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = shaderTexture_3.Sample(samplerState, uv).x;\n" +" return sample;\n" +" }\n" +"};\n" +"\n" +"class SamplerGBRAPremul : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.g = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.b = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.r = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = shaderTexture_3.Sample(samplerState, uv).x;\n" +" return DoAlphaUnpremul (sample);\n" +" }\n" +"};\n" +"\n" +"class SamplerGBRA_10 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.g = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.b = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.r = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = shaderTexture_3.Sample(samplerState, uv).x;\n" +" return saturate (sample * 64.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerGBRAPremul_10 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.g = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.b = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.r = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = shaderTexture_3.Sample(samplerState, uv).x;\n" +" return DoAlphaUnpremul (saturate (sample * 64.0));\n" +" }\n" +"};\n" +"\n" +"class SamplerGBRA_12 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.g = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.b = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.r = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = shaderTexture_3.Sample(samplerState, uv).x;\n" +" return saturate (sample * 16.0);\n" +" }\n" +"};\n" +"\n" +"class SamplerGBRAPremul_12 : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.g = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.b = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.r = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = shaderTexture_3.Sample(samplerState, uv).x;\n" +" return DoAlphaUnpremul (saturate (sample * 16.0));\n" +" }\n" +"};\n" +"\n" +"class SamplerRGBP : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.r = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.g = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.b = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = 1.0;\n" +" return sample;\n" +" }\n" +"};\n" +"\n" +"class SamplerBGRP : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" float4 sample;\n" +" sample.b = shaderTexture_0.Sample(samplerState, uv).x;\n" +" sample.g = shaderTexture_1.Sample(samplerState, uv).x;\n" +" sample.r = shaderTexture_2.Sample(samplerState, uv).x;\n" +" sample.a = 1.0;\n" +" return sample;\n" +" }\n" +"};\n" +"\n" +"class SamplerRBGA : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return shaderTexture_0.Sample(samplerState, uv).rbga;\n" +" }\n" +"};\n" +"\n" +"class SamplerRBGAPremul : ISampler\n" +"{\n" +" float4 Execute (float2 uv)\n" +" {\n" +" return DoAlphaUnpremul (shaderTexture_0.Sample(samplerState, uv).rbga);\n" +" }\n" +"};\n" +"\n" +"interface IConverter\n" +"{\n" +" float4 Execute (float4 sample);\n" +"};\n" +"\n" +"class ConverterIdentity : IConverter\n" +"{\n" +" float4 Execute (float4 sample)\n" +" {\n" +" return sample;\n" +" }\n" +"};\n" +"\n" +"class ConverterRange : IConverter\n" +"{\n" +" float4 Execute (float4 sample)\n" +" {\n" +" float3 out_space;\n" +" out_space.x = postCoeff.CoeffX.x * sample.x;\n" +" out_space.y = postCoeff.CoeffY.y * sample.y;\n" +" out_space.z = postCoeff.CoeffZ.z * sample.z;\n" +" out_space += postCoeff.Offset;\n" +" return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a);\n" +" }\n" +"};\n" +"\n" +"class ConverterSimple : IConverter\n" +"{\n" +" float4 Execute (float4 sample)\n" +" {\n" +" float3 out_space;\n" +" out_space.x = dot (postCoeff.CoeffX, sample.xyz);\n" +" out_space.y = dot (postCoeff.CoeffY, sample.xyz);\n" +" out_space.z = dot (postCoeff.CoeffZ, sample.xyz);\n" +" out_space += postCoeff.Offset;\n" +" return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a);\n" +" }\n" +"};\n" +"\n" +"class ConverterGamma : IConverter\n" +"{\n" +" float4 Execute (float4 sample)\n" +" {\n" +" float3 out_space;\n" +" out_space.x = dot (preCoeff.CoeffX, sample.xyz);\n" +" out_space.y = dot (preCoeff.CoeffY, sample.xyz);\n" +" out_space.z = dot (preCoeff.CoeffZ, sample.xyz);\n" +" out_space += preCoeff.Offset;\n" +" out_space = clamp (out_space, preCoeff.Min, preCoeff.Max);\n" +"\n" +" out_space.x = gammaDecLUT.Sample (lutSamplerState, out_space.x);\n" +" out_space.y = gammaDecLUT.Sample (lutSamplerState, out_space.y);\n" +" out_space.z = gammaDecLUT.Sample (lutSamplerState, out_space.z);\n" +"\n" +" out_space.x = gammaEncLUT.Sample (lutSamplerState, out_space.x);\n" +" out_space.y = gammaEncLUT.Sample (lutSamplerState, out_space.y);\n" +" out_space.z = gammaEncLUT.Sample (lutSamplerState, out_space.z);\n" +"\n" +" out_space.x = dot (postCoeff.CoeffX, out_space);\n" +" out_space.y = dot (postCoeff.CoeffY, out_space);\n" +" out_space.z = dot (postCoeff.CoeffZ, out_space);\n" +" out_space += postCoeff.Offset;\n" +" return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a);\n" +" }\n" +"};\n" +"\n" +"class ConverterPrimary : IConverter\n" +"{\n" +" float4 Execute (float4 sample)\n" +" {\n" +" float3 out_space;\n" +" float3 tmp;\n" +" out_space.x = dot (preCoeff.CoeffX, sample.xyz);\n" +" out_space.y = dot (preCoeff.CoeffY, sample.xyz);\n" +" out_space.z = dot (preCoeff.CoeffZ, sample.xyz);\n" +" out_space += preCoeff.Offset;\n" +" out_space = clamp (out_space, preCoeff.Min, preCoeff.Max);\n" +"\n" +" out_space.x = gammaDecLUT.Sample (lutSamplerState, out_space.x);\n" +" out_space.y = gammaDecLUT.Sample (lutSamplerState, out_space.y);\n" +" out_space.z = gammaDecLUT.Sample (lutSamplerState, out_space.z);\n" +"\n" +" tmp.x = dot (primariesCoeff.CoeffX, out_space);\n" +" tmp.y = dot (primariesCoeff.CoeffY, out_space);\n" +" tmp.z = dot (primariesCoeff.CoeffZ, out_space);\n" +"\n" +" out_space.x = gammaEncLUT.Sample (lutSamplerState, tmp.x);\n" +" out_space.y = gammaEncLUT.Sample (lutSamplerState, tmp.y);\n" +" out_space.z = gammaEncLUT.Sample (lutSamplerState, tmp.z);\n" +"\n" +" out_space.x = dot (postCoeff.CoeffX, out_space);\n" +" out_space.y = dot (postCoeff.CoeffY, out_space);\n" +" out_space.z = dot (postCoeff.CoeffZ, out_space);\n" +" out_space += postCoeff.Offset;\n" +" return float4 (clamp (out_space, postCoeff.Min, postCoeff.Max), sample.a);\n" +" }\n" +"};\n" +"\n" +"float UnormTo10bit (float sample)\n" +"{\n" +" return sample * 1023.0 / 65535.0;\n" +"}\n" +"\n" +"float2 UnormTo10bit (float2 sample)\n" +"{\n" +" return sample * 1023.0 / 65535.0;\n" +"}\n" +"\n" +"float3 UnormTo10bit (float3 sample)\n" +"{\n" +" return sample * 1023.0 / 65535.0;\n" +"}\n" +"\n" +"float4 UnormTo10bit (float4 sample)\n" +"{\n" +" return sample * 1023.0 / 65535.0;\n" +"}\n" +"\n" +"float UnormTo12bit (float sample)\n" +"{\n" +" return sample * 4095.0 / 65535.0;\n" +"}\n" +"\n" +"float2 UnormTo12bit (float2 sample)\n" +"{\n" +" return sample * 4095.0 / 65535.0;\n" +"}\n" +"\n" +"float3 UnormTo12bit (float3 sample)\n" +"{\n" +" return sample * 4095.0 / 65535.0;\n" +"}\n" +"\n" +"float4 UnormTo12bit (float4 sample)\n" +"{\n" +" return sample * 4095.0 / 65535.0;\n" +"}\n" +"\n" +"interface IOutputLuma\n" +"{\n" +" PS_OUTPUT_LUMA Build (float4 sample);\n" +"};\n" +"\n" +"class OutputLuma : IOutputLuma\n" +"{\n" +" PS_OUTPUT_LUMA Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_LUMA output;\n" +" output.Plane0 = float4 (sample.x, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputLuma_10 : IOutputLuma\n" +"{\n" +" PS_OUTPUT_LUMA Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_LUMA output;\n" +" output.Plane0 = float4 (UnormTo10bit (sample.x), 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputLuma_12 : IOutputLuma\n" +"{\n" +" PS_OUTPUT_LUMA Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_LUMA output;\n" +" output.Plane0 = float4 (UnormTo12bit (sample.x), 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"interface IOutputChroma\n" +"{\n" +" PS_OUTPUT_CHROMA Build (float4 sample);\n" +"};\n" +"\n" +"class OutputChromaNV12 : IOutputChroma\n" +"{\n" +" PS_OUTPUT_CHROMA Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_CHROMA output;\n" +" output.Plane0 = float4 (sample.yz, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputChromaNV21 : IOutputChroma\n" +"{\n" +" PS_OUTPUT_CHROMA Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_CHROMA output;\n" +" output.Plane0 = float4 (sample.zy, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"interface IOutputChromaPlanar\n" +"{ PS_OUTPUT_CHROMA_PLANAR Build (float4 sample);\n" +"};\n" +"\n" +"class OutputChromaI420 : IOutputChromaPlanar\n" +"{\n" +" PS_OUTPUT_CHROMA_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_CHROMA_PLANAR output;\n" +" output.Plane0 = float4 (sample.y, 0, 0, 0);\n" +" output.Plane1 = float4 (sample.z, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputChromaYV12 : IOutputChromaPlanar\n" +"{\n" +" PS_OUTPUT_CHROMA_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_CHROMA_PLANAR output;\n" +" output.Plane0 = float4 (sample.z, 0, 0, 0);\n" +" output.Plane1 = float4 (sample.y, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputChromaI420_10 : IOutputChromaPlanar\n" +"{\n" +" PS_OUTPUT_CHROMA_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_CHROMA_PLANAR output;\n" +" float2 scaled = UnormTo10bit (sample.yz);\n" +" output.Plane0 = float4 (scaled.x, 0, 0, 0);\n" +" output.Plane1 = float4 (scaled.y, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputChromaI420_12 : IOutputChromaPlanar\n" +"{\n" +" PS_OUTPUT_CHROMA_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_CHROMA_PLANAR output;\n" +" float2 scaled = UnormTo12bit (sample.yz);\n" +" output.Plane0 = float4 (scaled.x, 0, 0, 0);\n" +" output.Plane1 = float4 (scaled.y, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"interface IOutputPlanar\n" +"{\n" +" PS_OUTPUT_PLANAR Build (float4 sample);\n" +"};\n" +"\n" +"class OutputY444 : IOutputPlanar\n" +"{\n" +" PS_OUTPUT_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR output;\n" +" output.Plane0 = float4 (sample.x, 0, 0, 0);\n" +" output.Plane1 = float4 (sample.y, 0, 0, 0);\n" +" output.Plane2 = float4 (sample.z, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputY444_10 : IOutputPlanar\n" +"{\n" +" PS_OUTPUT_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR output;\n" +" float3 scaled = UnormTo10bit (sample.xyz);\n" +" output.Plane0 = float4 (scaled.x, 0, 0, 0);\n" +" output.Plane1 = float4 (scaled.y, 0, 0, 0);\n" +" output.Plane2 = float4 (scaled.z, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputY444_12 : IOutputPlanar\n" +"{\n" +" PS_OUTPUT_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR output;\n" +" float3 scaled = UnormTo12bit (sample.xyz);\n" +" output.Plane0 = float4 (scaled.x, 0, 0, 0);\n" +" output.Plane1 = float4 (scaled.y, 0, 0, 0);\n" +" output.Plane2 = float4 (scaled.z, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputGBR : IOutputPlanar\n" +"{\n" +" PS_OUTPUT_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR output;\n" +" output.Plane0 = float4 (sample.g, 0, 0, 0);\n" +" output.Plane1 = float4 (sample.b, 0, 0, 0);\n" +" output.Plane2 = float4 (sample.r, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputGBR_10 : IOutputPlanar\n" +"{\n" +" PS_OUTPUT_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR output;\n" +" float3 scaled = UnormTo10bit (sample.rgb);\n" +" output.Plane0 = float4 (scaled.g, 0, 0, 0);\n" +" output.Plane1 = float4 (scaled.b, 0, 0, 0);\n" +" output.Plane2 = float4 (scaled.r, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputGBR_12 : IOutputPlanar\n" +"{\n" +" PS_OUTPUT_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR output;\n" +" float3 scaled = UnormTo12bit (sample.rgb);\n" +" output.Plane0 = float4 (scaled.g, 0, 0, 0);\n" +" output.Plane1 = float4 (scaled.b, 0, 0, 0);\n" +" output.Plane2 = float4 (scaled.r, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputRGBP : IOutputPlanar\n" +"{\n" +" PS_OUTPUT_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR output;\n" +" output.Plane0 = float4 (sample.r, 0, 0, 0);\n" +" output.Plane1 = float4 (sample.g, 0, 0, 0);\n" +" output.Plane2 = float4 (sample.b, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputBGRP : IOutputPlanar\n" +"{\n" +" PS_OUTPUT_PLANAR Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR output;\n" +" output.Plane0 = float4 (sample.b, 0, 0, 0);\n" +" output.Plane1 = float4 (sample.g, 0, 0, 0);\n" +" output.Plane2 = float4 (sample.r, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"interface IOutputPlanarFull\n" +"{\n" +" PS_OUTPUT_PLANAR_FULL Build (float4 sample);\n" +"};\n" +"\n" +"class OutputGBRA : IOutputPlanarFull\n" +"{\n" +" PS_OUTPUT_PLANAR_FULL Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR_FULL output;\n" +" output.Plane0 = float4 (sample.g, 0, 0, 0);\n" +" output.Plane1 = float4 (sample.b, 0, 0, 0);\n" +" output.Plane2 = float4 (sample.r, 0, 0, 0);\n" +" output.Plane3 = float4 (sample.a * alphaFactor, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputGBRAPremul : IOutputPlanarFull\n" +"{\n" +" PS_OUTPUT_PLANAR_FULL Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR_FULL output;\n" +" float4 premul;\n" +" sample.a *= alphaFactor;\n" +" premul = DoAlphaPremul (sample);\n" +" output.Plane0 = float4 (premul.g, 0, 0, 0);\n" +" output.Plane1 = float4 (premul.b, 0, 0, 0);\n" +" output.Plane2 = float4 (premul.r, 0, 0, 0);\n" +" output.Plane3 = float4 (premul.a, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputGBRA_10 : IOutputPlanarFull\n" +"{\n" +" PS_OUTPUT_PLANAR_FULL Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR_FULL output;\n" +" float4 scaled;\n" +" sample.a *= alphaFactor;\n" +" scaled = UnormTo10bit (sample);\n" +" output.Plane0 = float4 (scaled.g, 0, 0, 0);\n" +" output.Plane1 = float4 (scaled.b, 0, 0, 0);\n" +" output.Plane2 = float4 (scaled.r, 0, 0, 0);\n" +" output.Plane3 = float4 (scaled.a, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputGBRAPremul_10 : IOutputPlanarFull\n" +"{\n" +" PS_OUTPUT_PLANAR_FULL Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR_FULL output;\n" +" float4 scaled;\n" +" sample.a *= alphaFactor;\n" +" scaled = UnormTo10bit (DoAlphaPremul (sample));\n" +" output.Plane0 = float4 (scaled.g, 0, 0, 0);\n" +" output.Plane1 = float4 (scaled.b, 0, 0, 0);\n" +" output.Plane2 = float4 (scaled.r, 0, 0, 0);\n" +" output.Plane3 = float4 (scaled.a, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputGBRA_12 : IOutputPlanarFull\n" +"{\n" +" PS_OUTPUT_PLANAR_FULL Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR_FULL output;\n" +" float4 scaled;\n" +" sample.a *= alphaFactor;\n" +" scaled = UnormTo12bit (sample);\n" +" output.Plane0 = float4 (scaled.g, 0, 0, 0);\n" +" output.Plane1 = float4 (scaled.b, 0, 0, 0);\n" +" output.Plane2 = float4 (scaled.r, 0, 0, 0);\n" +" output.Plane3 = float4 (scaled.a, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputGBRAPremul_12 : IOutputPlanarFull\n" +"{\n" +" PS_OUTPUT_PLANAR_FULL Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PLANAR_FULL output;\n" +" float4 scaled;\n" +" sample.a *= alphaFactor;\n" +" scaled = UnormTo12bit (DoAlphaPremul (sample));\n" +" output.Plane0 = float4 (scaled.g, 0, 0, 0);\n" +" output.Plane1 = float4 (scaled.b, 0, 0, 0);\n" +" output.Plane2 = float4 (scaled.r, 0, 0, 0);\n" +" output.Plane3 = float4 (scaled.a, 0, 0, 0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"interface IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample);\n" +"};\n" +"\n" +"class OutputRGBA : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" output.Plane0 = float4 (sample.rgb, sample.a * alphaFactor);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputRGBAPremul : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" sample.a *= alphaFactor;\n" +" output.Plane0 = DoAlphaPremul (sample);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputRGBx : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" output.Plane0 = float4 (sample.rgb, 1.0);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputxRGB : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" output.Plane0 = float4 (0.0, sample.rgb);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputARGB : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" output.Plane0 = sample.argb;\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputARGBPremul : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" output.Plane0 = DoAlphaPremul (sample).argb;\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputxBGR : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" output.Plane0 = float4 (0.0, sample.bgr);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputABGR : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" output.Plane0 = sample.abgr;\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputABGRPremul : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" output.Plane0 = DoAlphaPremul (sample.abgr);\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputVUYA : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" sample.a *= alphaFactor;\n" +" output.Plane0 = sample.zyxw;\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputVUYAPremul : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" sample.a *= alphaFactor;\n" +" output.Plane0 = DoAlphaPremul (sample).zyxw;\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputAYUV : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" sample.a *= alphaFactor;\n" +" output.Plane0 = sample.wxyz;\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputAYUVPremul : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" sample.a *= alphaFactor;\n" +" output.Plane0 = DoAlphaPremul (sample).wxyz;\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputRBGA : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" sample.a *= alphaFactor;\n" +" output.Plane0 = sample.rbga;\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"class OutputRBGAPremul : IOutputPacked\n" +"{\n" +" PS_OUTPUT_PACKED Build (float4 sample)\n" +" {\n" +" PS_OUTPUT_PACKED output;\n" +" sample.a *= alphaFactor;\n" +" output.Plane0 = DoAlphaPremul (sample).rbga;\n" +" return output;\n" +" }\n" +"};\n" +"\n" +"OUTPUT_TYPE ENTRY_POINT (PS_INPUT input)\n" +"{\n" +" SAMPLER g_sampler;\n" +" CONVERTER g_converter;\n" +" OUTPUT_BUILDER g_builder;\n" +" return g_builder.Build (g_converter.Execute (g_sampler.Execute (input.Texture)));\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/VSMain_converter.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/VSMain_converter.hlsl new file mode 100644 index 0000000000..6ece80da3f --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/VSMain_converter.hlsl @@ -0,0 +1,75 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +cbuffer VsConstBuffer : register(b0) +{ + matrix Transform; +}; + +struct VS_INPUT +{ + float4 Position : POSITION; + float2 Texture : TEXCOORD; +}; + +struct VS_OUTPUT +{ + float4 Position : SV_POSITION; + float2 Texture : TEXCOORD; +}; + +VS_OUTPUT ENTRY_POINT (VS_INPUT input) +{ + VS_OUTPUT output; + + output.Position = mul (Transform, input.Position); + output.Texture = input.Texture; + + return output; +} +#else +static const char str_VSMain_converter[] = +"cbuffer VsConstBuffer : register(b0)\n" +"{\n" +" matrix Transform;\n" +"};\n" +"\n" +"struct VS_INPUT\n" +"{\n" +" float4 Position : POSITION;\n" +" float2 Texture : TEXCOORD;\n" +"};\n" +"\n" +"struct VS_OUTPUT\n" +"{\n" +" float4 Position : SV_POSITION;\n" +" float2 Texture : TEXCOORD;\n" +"};\n" +"\n" +"VS_OUTPUT ENTRY_POINT (VS_INPUT input)\n" +"{\n" +" VS_OUTPUT output;\n" +"\n" +" output.Position = mul (Transform, input.Position);\n" +" output.Texture = input.Texture;\n" +"\n" +" return output;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/collect_hlsl_headers.py b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/collect_hlsl_headers.py new file mode 100644 index 0000000000..a6f13950b8 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/collect_hlsl_headers.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# GStreamer +# Copyright (C) 2023 Seungha Yang <seungha@centricular.com> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, +# Boston, MA 02110-1301, USA. + +import sys +import os +import argparse + +start_header = """/* + * This file is autogenerated by collect_hlsl_header.py + */ +#pragma once + +""" + +start_map = """ +#define MAKE_BYTECODE(name) { G_STRINGIFY (name), { g_##name, sizeof (g_##name)} } +static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> +""" + +end_map = """}; +#undef MAKE_BYTECODE +""" + +def main(args): + parser = argparse.ArgumentParser(description='Read precompiled HLSL headers from directory and make single header') + parser.add_argument("--input", help="the precompiled HLSL header directory") + parser.add_argument("--output", help="output header file location") + parser.add_argument("--prefix", help="HLSL header filename prefix") + parser.add_argument("--name", help="Hash map variable name") + args = parser.parse_args(args) + + # Scan precompiled PSMain_*.h headers in build directory + # and generate single header + hlsl_headers = [os.path.basename(file) for file in os.listdir(args.input) if file.startswith(args.prefix) and file.endswith(".h") ] + + with open(args.output, 'w', newline='\n', encoding='utf8') as f: + f.write(start_header) + for file in hlsl_headers: + f.write("#include \"") + f.write(file) + f.write("\"\n") + f.write(start_map) + f.write(args.name) + f.write(" = {\n") + for file in hlsl_headers: + f.write(" MAKE_BYTECODE ({}),\n".format(os.path.splitext(file)[0])) + f.write(end_map) + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/hlsl.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/hlsl.h new file mode 100644 index 0000000000..5fabd0b852 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/hlsl.h @@ -0,0 +1,24 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include "CSMain_converter.hlsl" +#include "PSMain_converter.hlsl" +#include "VSMain_converter.hlsl" diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/meson.build b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/meson.build new file mode 100644 index 0000000000..b501a9715b --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/converter-hlsl/meson.build @@ -0,0 +1,230 @@ +hlsl_ps_source = files('PSMain_converter.hlsl') +hlsl_vs_source = files('VSMain_converter.hlsl') +hlsl_cs_source = files('CSMain_converter.hlsl') + +hlsl_ps_input_formats = [ + ['NV12', false], + ['NV21', false], + ['I420', false], + ['YV12', false], + ['I420_10', false], + ['I420_12', false], + ['VUYA', false], + ['VUYAPremul', false], + ['Y410', false], + ['AYUV', false], + ['AYUVPremul', false], + ['Y412', false], + ['Y412Premul', false], + ['RGBA', true], + ['RGBAPremul', true], + ['RGBx', true], + ['GBR', true], + ['GBR_10', true], + ['GBR_12', true], + ['GBRA', true], + ['GBRAPremul', true], + ['GBRA_10', true], + ['GBRAPremul_10', true], + ['GBRA_12', true], + ['GBRAPremul_12', true], + ['RGBP', true], + ['BGRP', true], + ['xRGB', true], + ['ARGB', true], + ['ARGBPremul', true], + ['xBGR', true], + ['ABGR', true], + ['ABGRPremul', true], + ['BGR10A2', true], + ['BGRA64', true], + ['BGRA64Premul', true], + ['RBGA', true], + ['RBGAPremul', true], +] + +hlsl_ps_output_formats = [ + ['PS_OUTPUT_LUMA', 'Luma', false], + ['PS_OUTPUT_LUMA', 'Luma_10', false], + ['PS_OUTPUT_LUMA', 'Luma_12', false], + ['PS_OUTPUT_CHROMA', 'ChromaNV12', false], + ['PS_OUTPUT_CHROMA', 'ChromaNV21', false], + ['PS_OUTPUT_CHROMA_PLANAR', 'ChromaI420', false], + ['PS_OUTPUT_CHROMA_PLANAR', 'ChromaYV12', false], + ['PS_OUTPUT_CHROMA_PLANAR', 'ChromaI420_10', false], + ['PS_OUTPUT_CHROMA_PLANAR', 'ChromaI420_12', false], + ['PS_OUTPUT_PLANAR', 'Y444', false], + ['PS_OUTPUT_PLANAR', 'Y444_10', false], + ['PS_OUTPUT_PLANAR', 'Y444_12', false], + ['PS_OUTPUT_PLANAR', 'GBR', true], + ['PS_OUTPUT_PLANAR', 'GBR_10', true], + ['PS_OUTPUT_PLANAR', 'GBR_12', true], + ['PS_OUTPUT_PLANAR', 'RGBP', true], + ['PS_OUTPUT_PLANAR', 'BGRP', true], + ['PS_OUTPUT_PLANAR_FULL', 'GBRA', true], + ['PS_OUTPUT_PLANAR_FULL', 'GBRAPremul', true], + ['PS_OUTPUT_PLANAR_FULL', 'GBRA_10', true], + ['PS_OUTPUT_PLANAR_FULL', 'GBRAPremul_10', true], + ['PS_OUTPUT_PLANAR_FULL', 'GBRA_12', true], + ['PS_OUTPUT_PLANAR_FULL', 'GBRAPremul_12', true], + ['PS_OUTPUT_PACKED', 'RGBA', true], + ['PS_OUTPUT_PACKED', 'RGBAPremul', true], + ['PS_OUTPUT_PACKED', 'RBGA', true], + ['PS_OUTPUT_PACKED', 'RBGAPremul', true], + ['PS_OUTPUT_PACKED', 'RGBx', true], + ['PS_OUTPUT_PACKED', 'VUYA', false], + ['PS_OUTPUT_PACKED', 'VUYAPremul', false], + ['PS_OUTPUT_PACKED', 'AYUV', false], + ['PS_OUTPUT_PACKED', 'AYUVPremul', false], + ['PS_OUTPUT_PACKED', 'xRGB', true], + ['PS_OUTPUT_PACKED', 'ARGB', true], + ['PS_OUTPUT_PACKED', 'ARGBPremul', true], + ['PS_OUTPUT_PACKED', 'xBGR', true], + ['PS_OUTPUT_PACKED', 'ABGR', true], + ['PS_OUTPUT_PACKED', 'ABGRPremul', true], +] + +shader_model = '5_0' + +hlsl_cs_entry_points = [ + 'CSMain_YUY2_to_AYUV', + 'CSMain_UYVY_to_AYUV', + 'CSMain_VYUY_to_AYUV', + 'CSMain_YVYU_to_AYUV', + 'CSMain_v210_to_AYUV', + 'CSMain_v308_to_AYUV', + 'CSMain_IYU2_to_AYUV', + 'CSMain_AYUV_to_YUY2', + 'CSMain_AYUV_to_UYVY', + 'CSMain_AYUV_to_VYUY', + 'CSMain_AYUV_to_YVYU', + 'CSMain_AYUV_to_v210', + 'CSMain_AYUV_to_v308', + 'CSMain_AYUV_to_IYU2', + 'CSMain_AYUV_to_Y410', + 'CSMain_RGB_to_RGBA', + 'CSMain_BGR_to_RGBA', + 'CSMain_RGB16_to_RGBA', + 'CSMain_BGR16_to_RGBA', + 'CSMain_RGB15_to_RGBA', + 'CSMain_BGR15_to_RGBA', + 'CSMain_r210_to_RGBA', + 'CSMain_RGBA_to_RGB', + 'CSMain_RGBA_to_BGR', + 'CSMain_RGBA_to_RGB16', + 'CSMain_RGBA_to_BGR16', + 'CSMain_RGBA_to_RGB15', + 'CSMain_RGBA_to_BGR15', + 'CSMain_RGBA_to_r210', + 'CSMain_RGBA_to_BGRA', +] + +conv_ps_precompiled = [] +conv_vs_precompiled = [] +conv_cs_precompiled = [] + +header_collector = find_program('collect_hlsl_headers.py') + +foreach input_format : hlsl_ps_input_formats + in_format = input_format.get(0) + foreach output_format : hlsl_ps_output_formats + converter = '' + if input_format.get(1) != output_format.get(2) + converter = 'Simple' + else + converter = 'Identity' + endif + output_type = output_format.get(0) + output_builder = output_format.get(1) + entry_point = 'PSMain_@0@_@1@_@2@_@3@'.format(in_format, converter, output_builder, shader_model) + header = '@0@.h'.format(entry_point) + sm_target = 'ps_@0@'.format(shader_model) + compiled_shader = custom_target(header, + input : hlsl_ps_source, + output : header, + command : [fxc, '/Fh', '@OUTPUT@', + '/E', entry_point, + '/T', sm_target, + '/D', 'BUILDING_HLSL=1', + '/D', 'OUTPUT_TYPE=@0@'.format(output_type), + '/D', 'ENTRY_POINT=@0@'.format(entry_point), + '/D', 'SAMPLER=Sampler@0@'.format(in_format), + '/D', 'CONVERTER=Converter@0@'.format(converter), + '/D', 'OUTPUT_BUILDER=Output@0@'.format(output_builder), + '/nologo', + '@INPUT@']) + conv_ps_precompiled += [compiled_shader] + endforeach +endforeach + +conv_ps_collection = custom_target('converter_hlsl_ps', + input : conv_ps_precompiled, + output : 'converter_hlsl_ps.h', + command : [header_collector, + '--input', meson.current_build_dir(), + '--prefix', 'PSMain_', + '--name', 'g_converter_ps_table', + '--output', '@OUTPUT@' + ]) + +entry_point = 'VSMain_converter_@0@'.format(shader_model) +header = '@0@.h'.format(entry_point) +sm_target = 'vs_@0@'.format(shader_model) +compiled_shader = custom_target(header, + input : hlsl_vs_source, + output : header, + command : [fxc, '/Fh', '@OUTPUT@', + '/E', entry_point, + '/T', sm_target, + '/D', 'BUILDING_HLSL=1', + '/D', 'ENTRY_POINT=@0@'.format(entry_point), + '/nologo', + '@INPUT@']) +conv_vs_precompiled += [compiled_shader] + +conv_vs_collection = custom_target('converter_hlsl_vs', + input : conv_vs_precompiled, + output : 'converter_hlsl_vs.h', + command : [header_collector, + '--input', meson.current_build_dir(), + '--prefix', 'VSMain_', + '--name', 'g_converter_vs_table', + '--output', '@OUTPUT@' + ]) + +foreach shader : hlsl_cs_entry_points + entry_point = '@0@_@1@'.format(shader, shader_model) + header = '@0@.h'.format(entry_point) + sm_target = 'cs_@0@'.format(shader_model) + compiled_shader = custom_target(header, + input : hlsl_cs_source, + output : header, + command : [fxc, '/Fh', '@OUTPUT@', + '/E', entry_point, + '/T', sm_target, + '/D', 'BUILDING_HLSL=1', + '/D', 'ENTRY_POINT=@0@'.format(entry_point), + '/D', 'BUILDING_@0@=1'.format(shader), + '/nologo', + '@INPUT@']) + conv_cs_precompiled += [compiled_shader] +endforeach + +conv_cs_collection = custom_target('converter_hlsl_cs', + input : conv_cs_precompiled, + output : 'converter_hlsl_cs.h', + command : [header_collector, + '--input', meson.current_build_dir(), + '--prefix', 'CSMain_', + '--name', 'g_converter_cs_table', + '--output', '@OUTPUT@' + ]) + +hlsl_precompiled += [ + conv_ps_precompiled, + conv_vs_precompiled, + conv_cs_precompiled, + conv_ps_collection, + conv_vs_collection, + conv_cs_collection, +] diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/d3dshader-prelude.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/d3dshader-prelude.h new file mode 100644 index 0000000000..3d2c685f92 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/d3dshader-prelude.h @@ -0,0 +1,31 @@ +/* GStreamer + * Copyright (C) 2024 GStreamer developers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include <gst/gst.h> + +#ifndef GST_D3D_SHADER_API +# ifdef BUILDING_GST_D3D_SHADER +# define GST_D3D_SHADER_API GST_API_EXPORT /* from config.h */ +# else +# define GST_D3D_SHADER_API GST_API_IMPORT +# endif +#endif + diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dcompile.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dcompile.cpp new file mode 100644 index 0000000000..d47f8dc33c --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dcompile.cpp @@ -0,0 +1,140 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gstd3dcompile.h" +#include <gmodule.h> +#include <mutex> + +/** + * SECTION:gstd3dcompile + * @title: GstD3DCompile + * @short_description: HLSL compiler and utility + * + * A set of HLSL compile helper methods + * + * Since: 1.26 + */ + +#ifndef GST_DISABLE_GST_DEBUG +#define GST_CAT_DEFAULT ensure_debug_category() +static GstDebugCategory * +ensure_debug_category (void) +{ + static GstDebugCategory *cat = nullptr; + static std::once_flag cat_once; + + std::call_once (cat_once, [&]() { + cat = _gst_debug_category_new ("d3dcompile", 0, "d3dcompile"); + }); + + return cat; +} +#endif /* GST_DISABLE_GST_DEBUG */ + +static GModule *d3d_compiler_module = nullptr; +static pD3DCompile GstD3DCompileFunc = nullptr; + +/** + * gst_d3d_compile_init: + * + * Loads HLSL compiler library + * + * Returns: %TRUE if HLSL compiler library is available + * + * Since: 1.26 + */ +gboolean +gst_d3d_compile_init (void) +{ + static std::once_flag init_once; + std::call_once (init_once, [&]() { + static const gchar *d3d_compiler_names[] = { + "d3dcompiler_47.dll", + "d3dcompiler_46.dll", + "d3dcompiler_45.dll", + "d3dcompiler_44.dll", + "d3dcompiler_43.dll", + }; + + for (guint i = 0; i < G_N_ELEMENTS (d3d_compiler_names); i++) { + d3d_compiler_module = + g_module_open (d3d_compiler_names[i], G_MODULE_BIND_LAZY); + + if (d3d_compiler_module) { + GST_INFO ("D3D compiler %s is available", d3d_compiler_names[i]); + if (!g_module_symbol (d3d_compiler_module, "D3DCompile", + (gpointer *) & GstD3DCompileFunc)) { + GST_ERROR ("Cannot load D3DCompile symbol from %s", + d3d_compiler_names[i]); + g_module_close (d3d_compiler_module); + d3d_compiler_module = nullptr; + GstD3DCompileFunc = nullptr; + } else { + break; + } + } + } + + if (!GstD3DCompileFunc) + GST_WARNING ("D3D compiler library is unavailable"); + }); + + if (!GstD3DCompileFunc) + return FALSE; + + return TRUE; +} + +/** + * gst_d3d_compile: + * @src_data: source data to compile + * @src_data_size: length of src_data + * @source_name: (nullable): used for strings that specify error messages + * @defines: (nullable): null-terminated array of D3D_SHADER_MACRO struct that defines shader macros + * @include: (nullable): a ID3DInclude + * @entry_point: (nullable): the name of entry point function + * @target: a string specifies the shader target + * @flags1: flags defined by D3DCOMPILE constants + * @flags2: flags defined by D3DCOMPILE_EFFECT constants + * @code: (out) (optional): a compiled code + * @error_msgs: (out) (optional) (nullable): compiler error messages + * + * Compiles HLSL code or an effect file into bytecode for a given target + * + * Returns: HRESULT return code + * + * Since: 1.26 + */ +HRESULT +gst_d3d_compile (LPCVOID src_data, SIZE_T src_data_size, LPCSTR source_name, + CONST D3D_SHADER_MACRO * defines, ID3DInclude * include, LPCSTR entry_point, + LPCSTR target, UINT flags1, UINT flags2, ID3DBlob ** code, + ID3DBlob ** error_msgs) +{ + if (!gst_d3d_compile_init ()) + return E_FAIL; + + return GstD3DCompileFunc (src_data, src_data_size, source_name, defines, + include, entry_point, target, flags1, flags2, code, error_msgs); +} + diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dcompile.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dcompile.h new file mode 100644 index 0000000000..878594d59d --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dcompile.h @@ -0,0 +1,44 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include <gst/gst.h> +#include <gst/d3dshader/d3dshader-prelude.h> +#include <d3dcompiler.h> + +G_BEGIN_DECLS + +GST_D3D_SHADER_API +gboolean gst_d3d_compile_init (void); + +GST_D3D_SHADER_API +HRESULT gst_d3d_compile (LPCVOID src_data, + SIZE_T src_data_size, + LPCSTR source_name, + CONST D3D_SHADER_MACRO * defines, + ID3DInclude * include, + LPCSTR entry_point, + LPCSTR target, + UINT flags1, + UINT flags2, + ID3DBlob ** code, + ID3DBlob ** error_msgs); + +G_END_DECLS diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshader.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshader.h new file mode 100644 index 0000000000..bd0726d997 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshader.h @@ -0,0 +1,30 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#ifndef GST_USE_UNSTABLE_API +#pragma message ("The d3dshader library from gst-plugins-bad is unstable API and may change in future.") +#pragma message ("You can define GST_USE_UNSTABLE_API to avoid this warning.") +#endif + +#include <gst/gst.h> +#include <gst/d3dshader/gstd3dcompile.h> +#include <gst/d3dshader/gstd3dshadercache.h> + diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp new file mode 100644 index 0000000000..fb8245e5ef --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.cpp @@ -0,0 +1,950 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "gstd3dshadercache.h" +#include "gstd3dcompile.h" +#include <mutex> +#include <unordered_map> +#include <vector> +#include <string> +#include <utility> +#include <wrl.h> +#include "converter-hlsl/hlsl.h" +#include "plugin-hlsl/hlsl.h" + +/* *INDENT-OFF* */ +using namespace Microsoft::WRL; + +#ifdef HLSL_PRECOMPILED +#include "converter_hlsl_ps.h" +#include "converter_hlsl_vs.h" +#include "converter_hlsl_cs.h" +#include "plugin_hlsl_ps.h" +#include "plugin_hlsl_vs.h" +#else +static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_converter_ps_table; +static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_converter_vs_table; +static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_converter_cs_table; +static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_plugin_ps_table; +static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> g_plugin_vs_table; +#endif + +static std::vector<std::pair<std::string, ID3DBlob *>> g_compiled_blobs; +static std::mutex g_blob_lock; + +/* *INDENT-ON* */ + +struct ShaderItem +{ + guint type; + const gchar *name; + const gchar *source; + gsize source_size; +}; + +#define BUILD_SOURCE(name) G_STRINGIFY (name), str_ ##name, sizeof (str_##name) + +static const ShaderItem g_ps_map[] = { + {GST_D3D_PLUGIN_PS_CHECKER_LUMA, BUILD_SOURCE (PSMain_checker_luma)}, + {GST_D3D_PLUGIN_PS_CHECKER_RGB, BUILD_SOURCE (PSMain_checker_rgb)}, + {GST_D3D_PLUGIN_PS_CHECKER_VUYA, BUILD_SOURCE (PSMain_checker_vuya)}, + {GST_D3D_PLUGIN_PS_CHECKER, BUILD_SOURCE (PSMain_checker)}, + {GST_D3D_PLUGIN_PS_COLOR, BUILD_SOURCE (PSMain_color)}, + {GST_D3D_PLUGIN_PS_SAMPLE_PREMULT, BUILD_SOURCE (PSMain_sample_premul)}, + {GST_D3D_PLUGIN_PS_SAMPLE, BUILD_SOURCE (PSMain_sample)}, + {GST_D3D_PLUGIN_PS_SNOW, BUILD_SOURCE (PSMain_snow)}, +}; + +static const ShaderItem g_vs_map[] = { + {GST_D3D_PLUGIN_VS_COLOR, BUILD_SOURCE (VSMain_color)}, + {GST_D3D_PLUGIN_VS_COORD, BUILD_SOURCE (VSMain_coord)}, + {GST_D3D_PLUGIN_VS_POS, BUILD_SOURCE (VSMain_pos)}, +}; + +#undef BUILD_SOURCE + +static const gchar * g_sm_map[] = { + "4_0", + "5_0", + "5_1", +}; + +gboolean +gst_d3d_plugin_shader_get_vs_blob (GstD3DPluginVS type, + GstD3DShaderModel shader_model, GstD3DShaderByteCode * byte_code) +{ + g_return_val_if_fail (type < GST_D3D_PLUGIN_VS_LAST, FALSE); + g_return_val_if_fail (shader_model < GST_D3D_SM_LAST, FALSE); + g_return_val_if_fail (byte_code, FALSE); + + static std::mutex cache_lock; + + auto shader_name = std::string (g_vs_map[type].name) + "_" + + std::string (g_sm_map[shader_model]); + + std::lock_guard <std::mutex> lk (cache_lock); + auto it = g_plugin_vs_table.find (shader_name); + if (it != g_plugin_vs_table.end ()) { + byte_code->byte_code = it->second.first; + byte_code->byte_code_len = it->second.second; + + return TRUE; + } + + auto target = std::string ("vs_") + g_sm_map[shader_model]; + + ID3DBlob *blob = nullptr; + ComPtr<ID3DBlob> error_msg; + + auto hr = gst_d3d_compile (g_vs_map[type].source, g_vs_map[type].source_size, + nullptr, nullptr, nullptr, "ENTRY_POINT", target.c_str (), 0, 0, + &blob, &error_msg); + if (FAILED (hr)) { + const gchar *err = nullptr; + if (error_msg) + err = (const gchar *) error_msg->GetBufferPointer (); + + GST_ERROR ("Couldn't compile code, hr: 0x%x, error detail: %s, " + "source code: \n%s", (guint) hr, GST_STR_NULL (err), + g_ps_map[type].source); + return FALSE; + } + + byte_code->byte_code = blob->GetBufferPointer (); + byte_code->byte_code_len = blob->GetBufferSize (); + + g_plugin_vs_table[shader_name] = { (const BYTE *) blob->GetBufferPointer (), + blob->GetBufferSize ()}; + + std::lock_guard <std::mutex> blk (g_blob_lock); + g_compiled_blobs.push_back ({ shader_name, blob }); + + return TRUE; +} + +gboolean +gst_d3d_plugin_shader_get_ps_blob (GstD3DPluginPS type, + GstD3DShaderModel shader_model, GstD3DShaderByteCode * byte_code) +{ + g_return_val_if_fail (type < GST_D3D_PLUGIN_PS_LAST, FALSE); + g_return_val_if_fail (shader_model < GST_D3D_SM_LAST, FALSE); + g_return_val_if_fail (byte_code, FALSE); + + static std::mutex cache_lock; + + auto shader_name = std::string (g_ps_map[type].name) + "_" + + std::string (g_sm_map[shader_model]); + + std::lock_guard <std::mutex> lk (cache_lock); + auto it = g_plugin_ps_table.find (shader_name); + if (it != g_plugin_ps_table.end ()) { + byte_code->byte_code = it->second.first; + byte_code->byte_code_len = it->second.second; + + return TRUE; + } + + auto target = std::string ("ps_") + g_sm_map[shader_model]; + + ID3DBlob *blob = nullptr; + ComPtr<ID3DBlob> error_msg; + + auto hr = gst_d3d_compile (g_ps_map[type].source, g_ps_map[type].source_size, + nullptr, nullptr, nullptr, "ENTRY_POINT", target.c_str (), 0, 0, + &blob, &error_msg); + if (FAILED (hr)) { + const gchar *err = nullptr; + if (error_msg) + err = (const gchar *) error_msg->GetBufferPointer (); + + GST_ERROR ("Couldn't compile code, hr: 0x%x, error detail: %s, " + "source code: \n%s", (guint) hr, GST_STR_NULL (err), + g_ps_map[type].source); + return FALSE; + } + + byte_code->byte_code = blob->GetBufferPointer (); + byte_code->byte_code_len = blob->GetBufferSize (); + + g_plugin_ps_table[shader_name] = { (const BYTE *) blob->GetBufferPointer (), + blob->GetBufferSize ()}; + + std::lock_guard <std::mutex> blk (g_blob_lock); + g_compiled_blobs.push_back ({ shader_name, blob }); + + return TRUE; +} + +gboolean +gst_d3d_converter_shader_get_vs_blob (GstD3DShaderModel shader_model, + GstD3DShaderByteCode * byte_code) +{ + g_return_val_if_fail (shader_model < GST_D3D_SM_LAST, FALSE); + g_return_val_if_fail (byte_code, FALSE); + + static std::mutex cache_lock; + + auto shader_name = std::string ("VSMain_converter_") + + std::string (g_sm_map[shader_model]); + + std::lock_guard <std::mutex> lk (cache_lock); + auto it = g_converter_vs_table.find (shader_name); + if (it != g_converter_vs_table.end ()) { + byte_code->byte_code = it->second.first; + byte_code->byte_code_len = it->second.second; + + return TRUE; + } + + auto target = std::string ("vs_") + g_sm_map[shader_model]; + + ID3DBlob *blob = nullptr; + ComPtr<ID3DBlob> error_msg; + + auto hr = gst_d3d_compile (str_VSMain_converter, + sizeof (str_VSMain_converter), + nullptr, nullptr, nullptr, "ENTRY_POINT", target.c_str (), 0, 0, + &blob, &error_msg); + if (FAILED (hr)) { + const gchar *err = nullptr; + if (error_msg) + err = (const gchar *) error_msg->GetBufferPointer (); + + GST_ERROR ("Couldn't compile code, hr: 0x%x, error detail: %s, " + "source code: \n%s", (guint) hr, GST_STR_NULL (err), + str_VSMain_converter); + return FALSE; + } + + byte_code->byte_code = blob->GetBufferPointer (); + byte_code->byte_code_len = blob->GetBufferSize (); + + g_converter_vs_table[shader_name] = {(const BYTE *) blob->GetBufferPointer (), + blob->GetBufferSize ()}; + + std::lock_guard <std::mutex> blk (g_blob_lock); + g_compiled_blobs.push_back ({ shader_name, blob }); + + return TRUE; +} + +gboolean +gst_d3d_converter_shader_get_cs_blob (GstVideoFormat in_format, + GstVideoFormat out_format, GstD3DShaderModel shader_model, + GstD3DConverterCSByteCode * byte_code) +{ + g_return_val_if_fail (shader_model < GST_D3D_SM_LAST, FALSE); + g_return_val_if_fail (byte_code, FALSE); + + static std::mutex cache_lock; + + DXGI_FORMAT srv_format = DXGI_FORMAT_UNKNOWN; + DXGI_FORMAT uav_format = DXGI_FORMAT_UNKNOWN; + std::string in_format_str; + std::string out_format_str; + guint x_unit = 8; + guint y_unit = 8; + + switch (in_format) { + case GST_VIDEO_FORMAT_YUY2: + srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; + in_format_str = "YUY2"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_UYVY: + srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; + in_format_str = "UYVY"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_VYUY: + srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; + in_format_str = "VYUY"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_YVYU: + srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; + in_format_str = "YVYU"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_Y210: + case GST_VIDEO_FORMAT_Y212_LE: + srv_format = DXGI_FORMAT_R16G16B16A16_UNORM; + in_format_str = "YUY2"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_v210: + srv_format = DXGI_FORMAT_R10G10B10A2_UNORM; + in_format_str = "v210"; + x_unit = 48; + break; + case GST_VIDEO_FORMAT_v216: + srv_format = DXGI_FORMAT_R16G16B16A16_UNORM; + in_format_str = "UYVY"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_v308: + srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; + in_format_str = "v308"; + x_unit = 32; + break; + case GST_VIDEO_FORMAT_IYU2: + srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; + in_format_str = "IYU2"; + x_unit = 32; + break; + case GST_VIDEO_FORMAT_RGB: + srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; + in_format_str = "RGB"; + x_unit = 32; + break; + case GST_VIDEO_FORMAT_BGR: + srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; + in_format_str = "BGR"; + x_unit = 32; + break; + case GST_VIDEO_FORMAT_RGB16: + srv_format = DXGI_FORMAT_R16_UINT; + in_format_str = "RGB16"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_BGR16: + srv_format = DXGI_FORMAT_R16_UINT; + in_format_str = "BGR16"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_RGB15: + srv_format = DXGI_FORMAT_R16_UINT; + in_format_str = "RGB15"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_BGR15: + srv_format = DXGI_FORMAT_R16_UINT; + in_format_str = "BGR15"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_r210: + srv_format = DXGI_FORMAT_R32_UINT; + in_format_str = "r210"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_AYUV: + srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; + in_format_str = "AYUV"; + break; + case GST_VIDEO_FORMAT_AYUV64: + srv_format = DXGI_FORMAT_R16G16B16A16_UNORM; + in_format_str = "AYUV"; + break; + case GST_VIDEO_FORMAT_RGBA: + srv_format = DXGI_FORMAT_R8G8B8A8_UNORM; + in_format_str = "RGBA"; + break; + case GST_VIDEO_FORMAT_RGB10A2_LE: + srv_format = DXGI_FORMAT_R10G10B10A2_UNORM; + in_format_str = "RGBA"; + break; + case GST_VIDEO_FORMAT_RGBA64_LE: + srv_format = DXGI_FORMAT_R16G16B16A16_UNORM; + in_format_str = "RGBA"; + break; + default: + return FALSE; + } + + switch (out_format) { + case GST_VIDEO_FORMAT_YUY2: + uav_format = DXGI_FORMAT_R8G8B8A8_UNORM; + out_format_str = "YUY2"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_UYVY: + uav_format = DXGI_FORMAT_R8G8B8A8_UNORM; + out_format_str = "UYVY"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_VYUY: + uav_format = DXGI_FORMAT_R8G8B8A8_UNORM; + out_format_str = "VYUY"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_YVYU: + uav_format = DXGI_FORMAT_R8G8B8A8_UNORM; + out_format_str = "YVYU"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_Y210: + case GST_VIDEO_FORMAT_Y212_LE: + uav_format = DXGI_FORMAT_R16G16B16A16_UNORM; + out_format_str = "YUY2"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_v210: + uav_format = DXGI_FORMAT_R10G10B10A2_UNORM; + out_format_str = "v210"; + x_unit = 48; + break; + case GST_VIDEO_FORMAT_v216: + uav_format = DXGI_FORMAT_R16G16B16A16_UNORM; + out_format_str = "UYVY"; + x_unit = 16; + break; + case GST_VIDEO_FORMAT_v308: + uav_format = DXGI_FORMAT_R8G8B8A8_UNORM; + out_format_str = "v308"; + x_unit = 32; + break; + case GST_VIDEO_FORMAT_IYU2: + uav_format = DXGI_FORMAT_R8G8B8A8_UNORM; + out_format_str = "IYU2"; + x_unit = 32; + break; + case GST_VIDEO_FORMAT_Y410: + uav_format = DXGI_FORMAT_R10G10B10A2_UNORM; + out_format_str = "Y410"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_Y412_LE: + uav_format = DXGI_FORMAT_R16G16B16A16_UNORM; + out_format_str = "Y410"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_RGB: + uav_format = DXGI_FORMAT_R8G8B8A8_UNORM; + out_format_str = "RGB"; + x_unit = 32; + break; + case GST_VIDEO_FORMAT_BGR: + uav_format = DXGI_FORMAT_R8G8B8A8_UNORM; + out_format_str = "BGR"; + x_unit = 32; + break; + case GST_VIDEO_FORMAT_RGB16: + uav_format = DXGI_FORMAT_R16_UINT; + out_format_str = "RGB16"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_BGR16: + uav_format = DXGI_FORMAT_R16_UINT; + out_format_str = "BGR16"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_RGB15: + uav_format = DXGI_FORMAT_R16_UINT; + out_format_str = "RGB15"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_BGR15: + uav_format = DXGI_FORMAT_R16_UINT; + out_format_str = "BGR15"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_r210: + uav_format = DXGI_FORMAT_R32_UINT; + out_format_str = "r210"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_BGRA64_LE: + uav_format = DXGI_FORMAT_R16G16B16A16_UNORM; + out_format_str = "BGRA"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_BGR10A2_LE: + uav_format = DXGI_FORMAT_R10G10B10A2_UNORM; + out_format_str = "BGRA"; + x_unit = 8; + break; + case GST_VIDEO_FORMAT_AYUV: + uav_format = DXGI_FORMAT_R8G8B8A8_UNORM; + out_format_str = "AYUV"; + break; + case GST_VIDEO_FORMAT_AYUV64: + uav_format = DXGI_FORMAT_R16G16B16A16_UNORM; + out_format_str = "AYUV"; + break; + case GST_VIDEO_FORMAT_RGBA: + uav_format = DXGI_FORMAT_R8G8B8A8_UNORM; + out_format_str = "RGBA"; + break; + case GST_VIDEO_FORMAT_RGB10A2_LE: + uav_format = DXGI_FORMAT_R10G10B10A2_UNORM; + out_format_str = "RGBA"; + break; + default: + return FALSE; + } + + byte_code->x_unit = x_unit; + byte_code->y_unit = y_unit; + byte_code->srv_format = srv_format; + byte_code->uav_format = uav_format; + + auto shader_def = "CSMain_" + in_format_str + "_to_" + out_format_str; + auto shader_name = shader_def + "_" + std::string (g_sm_map[shader_model]); + + std::lock_guard <std::mutex> lk (cache_lock); + auto it = g_converter_cs_table.find (shader_name); + if (it != g_converter_cs_table.end ()) { + byte_code->byte_code.byte_code = it->second.first; + byte_code->byte_code.byte_code_len = it->second.second; + + return TRUE; + } + + auto target = std::string ("cs_") + g_sm_map[shader_model]; + std::vector<std::pair<std::string,std::string>> macro_str_pairs; + std::vector<D3D_SHADER_MACRO> macros; + + macro_str_pairs.push_back ({"BUILDING_" + shader_def, "1"}); + + for (const auto & def : macro_str_pairs) + macros.push_back({def.first.c_str (), def.second.c_str ()}); + + macros.push_back({nullptr, nullptr}); + + ID3DBlob *blob = nullptr; + ComPtr<ID3DBlob> error_msg; + + auto hr = gst_d3d_compile (str_CSMain_converter, + sizeof (str_CSMain_converter), + nullptr, macros.data (), nullptr, "ENTRY_POINT", target.c_str (), 0, 0, + &blob, &error_msg); + if (FAILED (hr)) { + const gchar *err = nullptr; + if (error_msg) + err = (const gchar *) error_msg->GetBufferPointer (); + + GST_ERROR ("Couldn't compile code, hr: 0x%x, error detail: %s, " + "source code: \n%s", (guint) hr, GST_STR_NULL (err), + str_VSMain_converter); + return FALSE; + } + + byte_code->byte_code.byte_code = blob->GetBufferPointer (); + byte_code->byte_code.byte_code_len = blob->GetBufferSize (); + + g_converter_cs_table[shader_name] = {(const BYTE *) blob->GetBufferPointer (), + blob->GetBufferSize ()}; + + std::lock_guard <std::mutex> blk (g_blob_lock); + g_compiled_blobs.push_back ({ shader_name, blob }); + + return TRUE; +} + +enum class PS_OUTPUT +{ + PACKED, + LUMA, + CHROMA, + CHROMA_PLANAR, + PLANAR, + PLANAR_FULL, +}; + +static const std::string +ps_output_to_string (PS_OUTPUT output) +{ + switch (output) { + case PS_OUTPUT::PACKED: + return "PS_OUTPUT_PACKED"; + case PS_OUTPUT::LUMA: + return "PS_OUTPUT_LUMA"; + case PS_OUTPUT::CHROMA: + return "PS_OUTPUT_CHROMA"; + case PS_OUTPUT::CHROMA_PLANAR: + return "PS_OUTPUT_CHROMA_PLANAR"; + case PS_OUTPUT::PLANAR: + return "PS_OUTPUT_PLANAR"; + case PS_OUTPUT::PLANAR_FULL: + return "PS_OUTPUT_PLANAR_FULL"; + default: + g_assert_not_reached (); + break; + } + + return ""; +} + +static guint +ps_output_get_num_rtv (PS_OUTPUT output) +{ + switch (output) { + case PS_OUTPUT::PACKED: + case PS_OUTPUT::LUMA: + case PS_OUTPUT::CHROMA: + return 1; + case PS_OUTPUT::CHROMA_PLANAR: + return 2; + case PS_OUTPUT::PLANAR: + return 3; + case PS_OUTPUT::PLANAR_FULL: + return 4; + default: + g_assert_not_reached (); + break; + } + + return 0; +} + +static std::string +conv_ps_make_input (GstVideoFormat format, gboolean premul) +{ + switch (format) { + case GST_VIDEO_FORMAT_RGBA: + case GST_VIDEO_FORMAT_RGBA64_LE: + case GST_VIDEO_FORMAT_RGB10A2_LE: + case GST_VIDEO_FORMAT_BGRA: + if (premul) + return "RGBAPremul"; + return "RGBA"; + case GST_VIDEO_FORMAT_RGBx: + case GST_VIDEO_FORMAT_BGRx: + return "RGBx"; + case GST_VIDEO_FORMAT_ARGB: + if (premul) + return "ARGBPremul"; + return "ARGB"; + case GST_VIDEO_FORMAT_xRGB: + return "xRGB"; + case GST_VIDEO_FORMAT_ABGR: + if (premul) + return "ABGRPremul"; + return "ABGR"; + case GST_VIDEO_FORMAT_xBGR: + return "xBGR"; + case GST_VIDEO_FORMAT_VUYA: + if (premul) + return "VUYAPremul"; + return "VUYA"; + case GST_VIDEO_FORMAT_AYUV: + case GST_VIDEO_FORMAT_AYUV64: + return "AYUV"; + case GST_VIDEO_FORMAT_NV12: + case GST_VIDEO_FORMAT_P010_10LE: + case GST_VIDEO_FORMAT_P012_LE: + case GST_VIDEO_FORMAT_P016_LE: + return "NV12"; + case GST_VIDEO_FORMAT_NV21: + return "NV21"; + case GST_VIDEO_FORMAT_I420: + case GST_VIDEO_FORMAT_Y42B: + case GST_VIDEO_FORMAT_Y444: + case GST_VIDEO_FORMAT_Y444_16LE: + return "I420"; + case GST_VIDEO_FORMAT_YV12: + return "YV12"; + case GST_VIDEO_FORMAT_I420_10LE: + case GST_VIDEO_FORMAT_I422_10LE: + case GST_VIDEO_FORMAT_Y444_10LE: + return "I420_10"; + case GST_VIDEO_FORMAT_I420_12LE: + case GST_VIDEO_FORMAT_I422_12LE: + case GST_VIDEO_FORMAT_Y444_12LE: + return "I420_12"; + case GST_VIDEO_FORMAT_Y410: + return "Y410"; + case GST_VIDEO_FORMAT_GRAY8: + case GST_VIDEO_FORMAT_GRAY16_LE: + return "GRAY"; + case GST_VIDEO_FORMAT_RGBP: + return "RGBP"; + case GST_VIDEO_FORMAT_BGRP: + return "BGRP"; + case GST_VIDEO_FORMAT_GBR: + case GST_VIDEO_FORMAT_GBR_16LE: + return "GBR"; + case GST_VIDEO_FORMAT_GBR_10LE: + return "GBR_10"; + case GST_VIDEO_FORMAT_GBR_12LE: + return "GBR_12"; + case GST_VIDEO_FORMAT_GBRA: + if (premul) + return "GBRAPremul"; + return "GBRA"; + case GST_VIDEO_FORMAT_GBRA_10LE: + if (premul) + return "GBRAPremul_10"; + return "GBRA_10"; + case GST_VIDEO_FORMAT_GBRA_12LE: + if (premul) + return "GBRAPremul_12"; + return "GBRA_12"; + case GST_VIDEO_FORMAT_Y412_LE: + if (premul) + return "Y412Premul"; + return "Y412"; + case GST_VIDEO_FORMAT_BGR10A2_LE: + return "BGR10A2"; + case GST_VIDEO_FORMAT_BGRA64_LE: + if (premul) + return "BGRA64Premul"; + return "BGRA64"; + case GST_VIDEO_FORMAT_RBGA: + if (premul) + return "RBGAPremul"; + return "RBGA"; + default: + g_assert_not_reached (); + break; + } + + return ""; +} + +static std::vector<std::pair<PS_OUTPUT, std::string>> +conv_ps_make_output (GstVideoFormat format, gboolean premul) +{ + std::vector<std::pair<PS_OUTPUT, std::string>> ret; + + switch (format) { + case GST_VIDEO_FORMAT_RGBA: + case GST_VIDEO_FORMAT_RGBA64_LE: + case GST_VIDEO_FORMAT_RGB10A2_LE: + case GST_VIDEO_FORMAT_BGRA: + if (premul) + ret.push_back({PS_OUTPUT::PACKED, "RGBAPremul"}); + else + ret.push_back({PS_OUTPUT::PACKED, "RGBA"}); + break; + case GST_VIDEO_FORMAT_RGBx: + case GST_VIDEO_FORMAT_BGRx: + ret.push_back({PS_OUTPUT::PACKED, "RGBx"}); + break; + case GST_VIDEO_FORMAT_ARGB: + if (premul) + ret.push_back({PS_OUTPUT::PACKED, "ARGBPremul"}); + else + ret.push_back({PS_OUTPUT::PACKED, "ARGB"}); + break; + case GST_VIDEO_FORMAT_xRGB: + ret.push_back({PS_OUTPUT::PACKED, "xRGB"}); + break; + case GST_VIDEO_FORMAT_ABGR: + if (premul) + ret.push_back({PS_OUTPUT::PACKED, "ABGRPremul"}); + else + ret.push_back({PS_OUTPUT::PACKED, "ABGR"}); + break; + case GST_VIDEO_FORMAT_xBGR: + ret.push_back({PS_OUTPUT::PACKED, "xBGR"}); + break; + case GST_VIDEO_FORMAT_VUYA: + if (premul) + ret.push_back({PS_OUTPUT::PACKED, "VUYAPremul"}); + else + ret.push_back({PS_OUTPUT::PACKED, "VUYA"}); + break; + case GST_VIDEO_FORMAT_AYUV: + case GST_VIDEO_FORMAT_AYUV64: + ret.push_back({PS_OUTPUT::PACKED, "AYUV"}); + break; + case GST_VIDEO_FORMAT_NV12: + case GST_VIDEO_FORMAT_P010_10LE: + case GST_VIDEO_FORMAT_P012_LE: + case GST_VIDEO_FORMAT_P016_LE: + ret.push_back({PS_OUTPUT::LUMA, "Luma"}); + ret.push_back({PS_OUTPUT::CHROMA, "ChromaNV12"}); + break; + case GST_VIDEO_FORMAT_NV21: + ret.push_back({PS_OUTPUT::LUMA, "Luma"}); + ret.push_back({PS_OUTPUT::CHROMA, "ChromaNV21"}); + break; + case GST_VIDEO_FORMAT_I420: + case GST_VIDEO_FORMAT_Y42B: + ret.push_back({PS_OUTPUT::LUMA, "Luma"}); + ret.push_back({PS_OUTPUT::CHROMA_PLANAR, "ChromaI420"}); + break; + case GST_VIDEO_FORMAT_Y444: + case GST_VIDEO_FORMAT_Y444_16LE: + ret.push_back({PS_OUTPUT::PLANAR, "Y444"}); + break; + case GST_VIDEO_FORMAT_YV12: + ret.push_back({PS_OUTPUT::LUMA, "Luma"}); + ret.push_back({PS_OUTPUT::CHROMA_PLANAR, "ChromaYV12"}); + break; + case GST_VIDEO_FORMAT_I420_10LE: + case GST_VIDEO_FORMAT_I422_10LE: + ret.push_back({PS_OUTPUT::LUMA, "Luma_10"}); + ret.push_back({PS_OUTPUT::CHROMA_PLANAR, "ChromaI420_10"}); + break; + case GST_VIDEO_FORMAT_Y444_10LE: + ret.push_back({PS_OUTPUT::PLANAR, "Y444_10"}); + break; + case GST_VIDEO_FORMAT_I420_12LE: + case GST_VIDEO_FORMAT_I422_12LE: + ret.push_back({PS_OUTPUT::LUMA, "Luma_12"}); + ret.push_back({PS_OUTPUT::CHROMA_PLANAR, "ChromaI420_12"}); + break; + case GST_VIDEO_FORMAT_Y444_12LE: + ret.push_back({PS_OUTPUT::PLANAR, "Y444_12"}); + break; + case GST_VIDEO_FORMAT_GRAY8: + case GST_VIDEO_FORMAT_GRAY16_LE: + ret.push_back({PS_OUTPUT::LUMA, "Luma"}); + break; + case GST_VIDEO_FORMAT_RGBP: + ret.push_back({PS_OUTPUT::PLANAR, "RGBP"}); + break; + case GST_VIDEO_FORMAT_BGRP: + ret.push_back({PS_OUTPUT::PLANAR, "BGRP"}); + break; + case GST_VIDEO_FORMAT_GBR: + case GST_VIDEO_FORMAT_GBR_16LE: + ret.push_back({PS_OUTPUT::PLANAR, "GBR"}); + break; + case GST_VIDEO_FORMAT_GBR_10LE: + ret.push_back({PS_OUTPUT::PLANAR, "GBR_10"}); + break; + case GST_VIDEO_FORMAT_GBR_12LE: + ret.push_back({PS_OUTPUT::PLANAR, "GBR_12"}); + break; + case GST_VIDEO_FORMAT_GBRA: + if (premul) + ret.push_back({PS_OUTPUT::PLANAR_FULL, "GBRAPremul"}); + else + ret.push_back({PS_OUTPUT::PLANAR_FULL, "GBRA"}); + break; + case GST_VIDEO_FORMAT_GBRA_10LE: + if (premul) + ret.push_back({PS_OUTPUT::PLANAR_FULL, "GBRAPremul_10"}); + else + ret.push_back({PS_OUTPUT::PLANAR_FULL, "GBRA_10"}); + break; + case GST_VIDEO_FORMAT_GBRA_12LE: + if (premul) + ret.push_back({PS_OUTPUT::PLANAR_FULL, "GBRAPremul_12"}); + else + ret.push_back({PS_OUTPUT::PLANAR_FULL, "GBRA_12"}); + break; + case GST_VIDEO_FORMAT_RBGA: + if (premul) + ret.push_back({PS_OUTPUT::PACKED, "RBGAPremul"}); + else + ret.push_back({PS_OUTPUT::PACKED, "RBGA"}); + break; + default: + g_assert_not_reached (); + break; + } + + return ret; +} + +guint +gst_d3d_converter_shader_get_ps_blob (GstVideoFormat in_format, + GstVideoFormat out_format, gboolean in_premul, gboolean out_premul, + GstD3DConverterType conv_type, GstD3DShaderModel shader_model, + GstD3DConverterPSByteCode byte_code[4]) +{ + static std::mutex cache_lock; + + auto input = conv_ps_make_input (in_format, in_premul); + auto output = conv_ps_make_output (out_format, out_premul); + std::string conv_type_str; + std::string sm_target; + guint ret = 0; + + switch (conv_type) { + case GST_D3D_CONVERTER_IDENTITY: + conv_type_str = "Identity"; + break; + case GST_D3D_CONVERTER_SIMPLE: + conv_type_str = "Simple"; + break; + case GST_D3D_CONVERTER_RANGE: + conv_type_str = "Range"; + break; + case GST_D3D_CONVERTER_GAMMA: + conv_type_str = "Gamma"; + break; + case GST_D3D_CONVERTER_PRIMARY: + conv_type_str = "Primary"; + break; + default: + g_assert_not_reached (); + return 0; + } + + sm_target = std::string ("ps_") + g_sm_map[shader_model]; + + std::lock_guard <std::mutex> lk (cache_lock); + for (const auto & it : output) { + auto output_builder = it.second; + std::string shader_name = "PSMain_" + input + "_" + conv_type_str + "_" + + output_builder + "_" + g_sm_map[shader_model]; + GstD3DConverterPSByteCode *ps_blob = &byte_code[ret]; + ps_blob->num_rtv = ps_output_get_num_rtv (it.first); + + auto cached = g_converter_ps_table.find (shader_name); + if (cached != g_converter_ps_table.end ()) { + ps_blob->byte_code.byte_code = cached->second.first; + ps_blob->byte_code.byte_code_len = cached->second.second; + } else { + std::vector<std::pair<std::string,std::string>> macro_str_pairs; + std::vector<D3D_SHADER_MACRO> macros; + auto output_type = ps_output_to_string (it.first); + + macro_str_pairs.push_back ({"ENTRY_POINT", shader_name}); + macro_str_pairs.push_back ({"SAMPLER", "Sampler" + input}); + macro_str_pairs.push_back ({"CONVERTER", "Converter" + conv_type_str}); + macro_str_pairs.push_back ({"OUTPUT_TYPE", output_type }); + macro_str_pairs.push_back ({"OUTPUT_BUILDER", "Output" + output_builder}); + + for (const auto & def : macro_str_pairs) + macros.push_back({def.first.c_str (), def.second.c_str ()}); + + macros.push_back({nullptr, nullptr}); + + ID3DBlob *blob = nullptr; + ComPtr<ID3DBlob> error_msg; + + auto hr = gst_d3d_compile (str_PSMain_converter, + sizeof (str_PSMain_converter), nullptr, macros.data (), nullptr, + shader_name.c_str (), sm_target.c_str (), 0, 0, &blob, &error_msg); + if (FAILED (hr)) { + const gchar *err = nullptr; + if (error_msg) + err = (const gchar *) error_msg->GetBufferPointer (); + + GST_ERROR ("Couldn't compile code, hr: 0x%x, error detail: %s", + (guint) hr, GST_STR_NULL (err)); + return 0; + } + + ps_blob->byte_code.byte_code = blob->GetBufferPointer (); + ps_blob->byte_code.byte_code_len = blob->GetBufferSize (); + + g_converter_ps_table[shader_name] = { + (const BYTE *) blob->GetBufferPointer (), + blob->GetBufferSize () }; + + std::lock_guard <std::mutex> blk (g_blob_lock); + g_compiled_blobs.push_back ({ shader_name, blob }); + } + + ret++; + } + + return ret; +} diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h new file mode 100644 index 0000000000..e6958e41eb --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/gstd3dshadercache.h @@ -0,0 +1,121 @@ +/* GStreamer + * Copyright (C) 2024 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include <gst/gst.h> +#include <gst/d3dshader/d3dshader-prelude.h> +#include <gst/video/video.h> +#include <dxgi.h> + +G_BEGIN_DECLS + +typedef enum +{ + GST_D3D_PLUGIN_PS_CHECKER_LUMA, + GST_D3D_PLUGIN_PS_CHECKER_RGB, + GST_D3D_PLUGIN_PS_CHECKER_VUYA, + GST_D3D_PLUGIN_PS_CHECKER, + GST_D3D_PLUGIN_PS_COLOR, + GST_D3D_PLUGIN_PS_SAMPLE_PREMULT, + GST_D3D_PLUGIN_PS_SAMPLE, + GST_D3D_PLUGIN_PS_SNOW, + + GST_D3D_PLUGIN_PS_LAST +} GstD3DPluginPS; + +typedef enum +{ + GST_D3D_PLUGIN_VS_COLOR, + GST_D3D_PLUGIN_VS_COORD, + GST_D3D_PLUGIN_VS_POS, + + GST_D3D_PLUGIN_VS_LAST, +} GstD3DPluginVS; + + +typedef enum +{ + GST_D3D_SM_4_0, + GST_D3D_SM_5_0, + GST_D3D_SM_5_1, + + GST_D3D_SM_LAST +} GstD3DShaderModel; + +typedef struct _GstD3DShaderByteCode +{ + gconstpointer byte_code; + gsize byte_code_len; +} GstD3DShaderByteCode; + +typedef enum +{ + GST_D3D_CONVERTER_IDENTITY, + GST_D3D_CONVERTER_SIMPLE, + GST_D3D_CONVERTER_RANGE, + GST_D3D_CONVERTER_GAMMA, + GST_D3D_CONVERTER_PRIMARY, +} GstD3DConverterType; + +typedef struct _GstD3DConverterCSByteCode +{ + GstD3DShaderByteCode byte_code; + guint x_unit; + guint y_unit; + DXGI_FORMAT srv_format; + DXGI_FORMAT uav_format; +} GstD3DConverterCSByteCode; + +typedef struct _GstD3DConverterPSByteCode +{ + GstD3DShaderByteCode byte_code; + guint num_rtv; +} GstD3DConverterPSByteCode; + +GST_D3D_SHADER_API +gboolean gst_d3d_plugin_shader_get_vs_blob (GstD3DPluginVS type, + GstD3DShaderModel shader_model, + GstD3DShaderByteCode * byte_code); + +GST_D3D_SHADER_API +gboolean gst_d3d_plugin_shader_get_ps_blob (GstD3DPluginPS type, + GstD3DShaderModel shader_model, + GstD3DShaderByteCode * byte_code); + +GST_D3D_SHADER_API +gboolean gst_d3d_converter_shader_get_vs_blob (GstD3DShaderModel shader_model, + GstD3DShaderByteCode * byte_code); + +GST_D3D_SHADER_API +gboolean gst_d3d_converter_shader_get_cs_blob (GstVideoFormat in_format, + GstVideoFormat out_format, + GstD3DShaderModel shader_model, + GstD3DConverterCSByteCode * byte_code); + +GST_D3D_SHADER_API +guint gst_d3d_converter_shader_get_ps_blob (GstVideoFormat in_format, + GstVideoFormat out_format, + gboolean in_premul, + gboolean out_premul, + GstD3DConverterType conv_type, + GstD3DShaderModel shader_model, + GstD3DConverterPSByteCode byte_code[4]); + +G_END_DECLS diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/meson.build b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/meson.build new file mode 100644 index 0000000000..90565ae39f --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/meson.build @@ -0,0 +1,68 @@ +d3dshader_sources = [ + 'gstd3dcompile.cpp', + 'gstd3dshadercache.cpp', +] + +gst_d3dshader_dep = dependency('', required : false) + +if host_system != 'windows' + subdir_done() +endif + +extra_args = [ + '-DGST_USE_UNSTABLE_API', + '-DBUILDING_GST_D3D_SHADER', + '-DG_LOG_DOMAIN="GStreamer-D3DShader"', +] + +sdk_headers = [ + 'wrl.h', + 'd3dcompiler.h', + 'dxgi.h', +] + +foreach h : sdk_headers + if not cc.has_header(h) + subdir_done() + endif +endforeach + +# MinGW 32bits compiler seems to be complaining about redundant-decls +# when ComPtr is in use. Let's just disable the warning +if cc.get_id() != 'msvc' + extra_args += cc.get_supported_arguments([ + '-Wno-redundant-decls', + ]) +endif + +hlsl_precompile_opt = get_option('d3d-hlsl-precompile') + +hlsl_precompiled = [] +fxc = find_program ('fxc', required : hlsl_precompile_opt) +if not hlsl_precompile_opt.disabled() and cc.get_id() == 'msvc' and fxc.found() + subdir('converter-hlsl') + subdir('plugin-hlsl') + extra_args += ['-DHLSL_PRECOMPILED'] +endif + +pkg_name = 'gstreamer-d3dshader-' + api_version +gstd3dshader = library('gstd3dshader-' + api_version, + d3dshader_sources + hlsl_precompiled, + c_args : gst_plugins_bad_args + extra_args, + cpp_args : gst_plugins_bad_args + extra_args, + include_directories : [configinc, libsinc], + version : libversion, + soversion : soversion, + install : true, + dependencies : [gstvideo_dep, gmodule_dep] +) + +library_def = {'lib': gstd3dshader} +gst_libraries += [[pkg_name, library_def]] + +gstd3dshader_dep = declare_dependency(link_with : gstd3dshader, + include_directories : [libsinc], + dependencies : [gstvideo_dep], + sources : gen_sources) + +meson.override_dependency(pkg_name, gstd3dshader_dep) diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker.hlsl new file mode 100644 index 0000000000..f76bd362a6 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker.hlsl @@ -0,0 +1,73 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +cbuffer CheckerConstBuffer : register(b0) +{ + float width; + float height; + float checker_size; + float alpha; +}; + +struct PS_INPUT +{ + float4 Position: SV_POSITION; + float2 Texture: TEXCOORD; +}; + +float4 ENTRY_POINT (PS_INPUT input) : SV_Target +{ + float4 output; + float2 xy_mod = floor (0.5 * input.Texture * float2 (width, height) / checker_size); + float result = fmod (xy_mod.x + xy_mod.y, 2.0); + output.r = step (result, 0.5); + output.g = 1.0 - output.r; + output.b = 0; + output.a = alpha; + return output; +} +#else +static const char str_PSMain_checker[] = +"cbuffer CheckerConstBuffer : register(b0)\n" +"{\n" +" float width;\n" +" float height;\n" +" float checker_size;\n" +" float alpha;\n" +"};\n" +"\n" +"struct PS_INPUT\n" +"{\n" +" float4 Position: SV_POSITION;\n" +" float2 Texture: TEXCOORD;\n" +"};\n" +"\n" +"float4 ENTRY_POINT (PS_INPUT input) : SV_Target\n" +"{\n" +" float4 output;\n" +" float2 xy_mod = floor (0.5 * input.Texture * float2 (width, height) / checker_size);\n" +" float result = fmod (xy_mod.x + xy_mod.y, 2.0);\n" +" output.r = step (result, 0.5);\n" +" output.g = 1.0 - output.r;\n" +" output.b = 0;\n" +" output.a = alpha;\n" +" return output;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker_luma.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker_luma.hlsl new file mode 100644 index 0000000000..74c7b7acc0 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker_luma.hlsl @@ -0,0 +1,83 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +static const float blocksize = 8.0; +static const float4 high = float4 (0.667, 0.0, 0.0, 1.0); +static const float4 low = float4 (0.333, 0.0, 0.0, 1.0); + +struct PS_INPUT +{ + float4 Position : SV_POSITION; +}; + +struct PS_OUTPUT +{ + float4 Plane : SV_TARGET; +}; + +PS_OUTPUT ENTRY_POINT (PS_INPUT input) +{ + PS_OUTPUT output; + if ((input.Position.x % (blocksize * 2.0)) >= blocksize) { + if ((input.Position.y % (blocksize * 2.0)) >= blocksize) + output.Plane = low; + else + output.Plane = high; + } else { + if ((input.Position.y % (blocksize * 2.0)) < blocksize) + output.Plane = low; + else + output.Plane = high; + } + return output; +} +#else +static const char str_PSMain_checker_luma[] = +"static const float blocksize = 8.0;\n" +"static const float4 high = float4 (0.667, 0.0, 0.0, 1.0);\n" +"static const float4 low = float4 (0.333, 0.0, 0.0, 1.0);\n" +"\n" +"struct PS_INPUT\n" +"{\n" +" float4 Position : SV_POSITION;\n" +"};\n" +"\n" +"struct PS_OUTPUT\n" +"{\n" +" float4 Plane : SV_TARGET;\n" +"};\n" +"\n" +"PS_OUTPUT ENTRY_POINT (PS_INPUT input)\n" +"{\n" +" PS_OUTPUT output;\n" +" if ((input.Position.x % (blocksize * 2.0)) >= blocksize) {\n" +" if ((input.Position.y % (blocksize * 2.0)) >= blocksize)\n" +" output.Plane = low;\n" +" else\n" +" output.Plane = high;\n" +" } else {\n" +" if ((input.Position.y % (blocksize * 2.0)) < blocksize)\n" +" output.Plane = low;\n" +" else\n" +" output.Plane = high;\n" +" }\n" +" return output;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker_rgb.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker_rgb.hlsl new file mode 100644 index 0000000000..19d5505b05 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker_rgb.hlsl @@ -0,0 +1,83 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +static const float blocksize = 8.0; +static const float4 high = float4 (0.667, 0.667, 0.667, 1.0); +static const float4 low = float4 (0.333, 0.333, 0.333, 1.0); + +struct PS_INPUT +{ + float4 Position : SV_POSITION; +}; + +struct PS_OUTPUT +{ + float4 Plane : SV_TARGET; +}; + +PS_OUTPUT ENTRY_POINT (PS_INPUT input) +{ + PS_OUTPUT output; + if ((input.Position.x % (blocksize * 2.0)) >= blocksize) { + if ((input.Position.y % (blocksize * 2.0)) >= blocksize) + output.Plane = low; + else + output.Plane = high; + } else { + if ((input.Position.y % (blocksize * 2.0)) < blocksize) + output.Plane = low; + else + output.Plane = high; + } + return output; +} +#else +static const char str_PSMain_checker_rgb[] = +"static const float blocksize = 8.0;\n" +"static const float4 high = float4 (0.667, 0.667, 0.667, 1.0);\n" +"static const float4 low = float4 (0.333, 0.333, 0.333, 1.0);\n" +"\n" +"struct PS_INPUT\n" +"{\n" +" float4 Position : SV_POSITION;\n" +"};\n" +"\n" +"struct PS_OUTPUT\n" +"{\n" +" float4 Plane : SV_TARGET;\n" +"};\n" +"\n" +"PS_OUTPUT ENTRY_POINT (PS_INPUT input)\n" +"{\n" +" PS_OUTPUT output;\n" +" if ((input.Position.x % (blocksize * 2.0)) >= blocksize) {\n" +" if ((input.Position.y % (blocksize * 2.0)) >= blocksize)\n" +" output.Plane = low;\n" +" else\n" +" output.Plane = high;\n" +" } else {\n" +" if ((input.Position.y % (blocksize * 2.0)) < blocksize)\n" +" output.Plane = low;\n" +" else\n" +" output.Plane = high;\n" +" }\n" +" return output;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker_vuya.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker_vuya.hlsl new file mode 100644 index 0000000000..e842cb940b --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_checker_vuya.hlsl @@ -0,0 +1,83 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +static const float blocksize = 8.0; +static const float4 high = float4 (0.5, 0.5, 0.667, 1.0); +static const float4 low = float4 (0.5, 0.5, 0.333, 1.0); + +struct PS_INPUT +{ + float4 Position : SV_POSITION; +}; + +struct PS_OUTPUT +{ + float4 Plane : SV_TARGET; +}; + +PS_OUTPUT ENTRY_POINT (PS_INPUT input) +{ + PS_OUTPUT output; + if ((input.Position.x % (blocksize * 2.0)) >= blocksize) { + if ((input.Position.y % (blocksize * 2.0)) >= blocksize) + output.Plane = low; + else + output.Plane = high; + } else { + if ((input.Position.y % (blocksize * 2.0)) < blocksize) + output.Plane = low; + else + output.Plane = high; + } + return output; +} +#else +static const char str_PSMain_checker_vuya[] = +"static const float blocksize = 8.0;\n" +"static const float4 high = float4 (0.5, 0.5, 0.667, 1.0);\n" +"static const float4 low = float4 (0.5, 0.5, 0.333, 1.0);\n" +"\n" +"struct PS_INPUT\n" +"{\n" +" float4 Position : SV_POSITION;\n" +"};\n" +"\n" +"struct PS_OUTPUT\n" +"{\n" +" float4 Plane : SV_TARGET;\n" +"};\n" +"\n" +"PS_OUTPUT ENTRY_POINT (PS_INPUT input)\n" +"{\n" +" PS_OUTPUT output;\n" +" if ((input.Position.x % (blocksize * 2.0)) >= blocksize) {\n" +" if ((input.Position.y % (blocksize * 2.0)) >= blocksize)\n" +" output.Plane = low;\n" +" else\n" +" output.Plane = high;\n" +" } else {\n" +" if ((input.Position.y % (blocksize * 2.0)) < blocksize)\n" +" output.Plane = low;\n" +" else\n" +" output.Plane = high;\n" +" }\n" +" return output;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_color.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_color.hlsl new file mode 100644 index 0000000000..0e16d7c1b4 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_color.hlsl @@ -0,0 +1,43 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +struct PS_INPUT +{ + float4 Position: SV_POSITION; + float4 Color: COLOR; +}; + +float4 ENTRY_POINT (PS_INPUT input) : SV_TARGET +{ + return input.Color; +} +#else +static const char str_PSMain_color[] = +"struct PS_INPUT\n" +"{\n" +" float4 Position: SV_POSITION;\n" +" float4 Color: COLOR;\n" +"};\n" +"\n" +"float4 ENTRY_POINT (PS_INPUT input) : SV_TARGET\n" +"{\n" +" return input.Color;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_sample.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_sample.hlsl new file mode 100644 index 0000000000..b3470b5742 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_sample.hlsl @@ -0,0 +1,49 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +Texture2D shaderTexture; +SamplerState samplerState; + +struct PS_INPUT +{ + float4 Position : SV_POSITION; + float2 Texture : TEXCOORD; +}; + +float4 ENTRY_POINT (PS_INPUT input): SV_TARGET +{ + return shaderTexture.Sample (samplerState, input.Texture); +} +#else +static const char str_PSMain_sample[] = +"Texture2D shaderTexture;\n" +"SamplerState samplerState;\n" +"\n" +"struct PS_INPUT\n" +"{\n" +" float4 Position: SV_POSITION;\n" +" float2 Texture: TEXCOORD;\n" +"};\n" +"\n" +"float4 ENTRY_POINT (PS_INPUT input): SV_TARGET\n" +"{\n" +" return shaderTexture.Sample (samplerState, input.Texture);\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_sample_premul.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_sample_premul.hlsl new file mode 100644 index 0000000000..6ac5e6070f --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_sample_premul.hlsl @@ -0,0 +1,61 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +Texture2D shaderTexture; +SamplerState samplerState; + +struct PS_INPUT +{ + float4 Position : SV_POSITION; + float2 Texture : TEXCOORD; +}; + +float4 ENTRY_POINT (PS_INPUT input): SV_TARGET +{ + float4 sample = shaderTexture.Sample (samplerState, input.Texture); + float4 premul_sample; + premul_sample.r = saturate (sample.r * sample.a); + premul_sample.g = saturate (sample.g * sample.a); + premul_sample.b = saturate (sample.b * sample.a); + premul_sample.a = sample.a; + return premul_sample; +} +#else +static const char str_PSMain_sample_premul[] = +"Texture2D shaderTexture;\n" +"SamplerState samplerState;\n" +"\n" +"struct PS_INPUT\n" +"{\n" +" float4 Position : SV_POSITION;\n" +" float2 Texture : TEXCOORD;\n" +"};\n" +"\n" +"float4 ENTRY_POINT (PS_INPUT input): SV_TARGET\n" +"{\n" +" float4 sample = shaderTexture.Sample (samplerState, input.Texture);\n" +" float4 premul_sample;\n" +" premul_sample.r = saturate (sample.r * sample.a);\n" +" premul_sample.g = saturate (sample.g * sample.a);\n" +" premul_sample.b = saturate (sample.b * sample.a);\n" +" premul_sample.a = sample.a;\n" +" return premul_sample;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_snow.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_snow.hlsl new file mode 100644 index 0000000000..34cbf5a0cb --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/PSMain_snow.hlsl @@ -0,0 +1,75 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +cbuffer SnowConstBuffer : register(b0) +{ + float time; + float alpha; + float2 padding; +}; + +struct PS_INPUT +{ + float4 Position : SV_POSITION; + float2 Texture : TEXCOORD; +}; + +float get_rand (float2 uv) +{ + return frac (sin (dot (uv, float2 (12.9898,78.233))) * 43758.5453); +} + +float4 ENTRY_POINT (PS_INPUT input) : SV_Target +{ + float4 output; + float val = get_rand (time * input.Texture); + output.rgb = float3(val, val, val); + output.a = alpha; + return output; +} +#else +static const char str_PSMain_snow[] = +"cbuffer TimeConstBuffer : register(b0)\n" +"{\n" +" float time;\n" +" float alpha;\n" +" float2 padding;\n" +"};\n" +"\n" +"struct PS_INPUT\n" +"{\n" +" float4 Position : SV_POSITION;\n" +" float2 Texture : TEXCOORD;\n" +"};\n" +"\n" +"float get_rand(float2 uv)\n" +"{\n" +" return frac (sin (dot (uv, float2 (12.9898,78.233))) * 43758.5453);\n" +"}\n" +"\n" +"float4 ENTRY_POINT (PS_INPUT input) : SV_Target\n" +"{\n" +" float4 output;\n" +" float val = get_rand (time * input.Texture);\n" +" output.rgb = float3(val, val, val);\n" +" output.a = alpha;\n" +" return output;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/VSMain_color.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/VSMain_color.hlsl new file mode 100644 index 0000000000..d1940740b4 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/VSMain_color.hlsl @@ -0,0 +1,55 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +struct VS_INPUT +{ + float4 Position : POSITION; + float4 Color : COLOR; +}; + +struct VS_OUTPUT +{ + float4 Position : SV_POSITION; + float4 Color : COLOR; +}; + +VS_OUTPUT ENTRY_POINT (VS_INPUT input) +{ + return input; +} +#else +static const char str_VSMain_color[] = +"struct VS_INPUT\n" +"{\n" +" float4 Position : POSITION;\n" +" float4 Color : COLOR;\n" +"};\n" +"\n" +"struct VS_OUTPUT\n" +"{\n" +" float4 Position : SV_POSITION;\n" +" float4 Color : COLOR;\n" +"};\n" +"\n" +"VS_OUTPUT ENTRY_POINT (VS_INPUT input)\n" +"{\n" +" return input;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/VSMain_coord.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/VSMain_coord.hlsl new file mode 100644 index 0000000000..3699426364 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/VSMain_coord.hlsl @@ -0,0 +1,55 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +struct VS_INPUT +{ + float4 Position : POSITION; + float2 Texture : TEXCOORD; +}; + +struct VS_OUTPUT +{ + float4 Position : SV_POSITION; + float2 Texture : TEXCOORD; +}; + +VS_OUTPUT ENTRY_POINT (VS_INPUT input) +{ + return input; +} +#else +static const char str_VSMain_coord[] = +"struct VS_INPUT\n" +"{\n" +" float4 Position : POSITION;\n" +" float2 Texture : TEXCOORD;\n" +"};\n" +"\n" +"struct VS_OUTPUT\n" +"{\n" +" float4 Position : SV_POSITION;\n" +" float2 Texture : TEXCOORD;\n" +"};\n" +"\n" +"VS_OUTPUT ENTRY_POINT (VS_INPUT input)\n" +"{\n" +" return input;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/VSMain_pos.hlsl b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/VSMain_pos.hlsl new file mode 100644 index 0000000000..2a01b38b48 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/VSMain_pos.hlsl @@ -0,0 +1,51 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef BUILDING_HLSL +struct VS_INPUT +{ + float4 Position : POSITION; +}; + +struct VS_OUTPUT +{ + float4 Position : SV_POSITION; +}; + +VS_OUTPUT ENTRY_POINT (VS_INPUT input) +{ + return input; +} +#else +static const char str_VSMain_pos[] = +"struct VS_INPUT\n" +"{\n" +" float4 Position : POSITION;\n" +"};\n" +"\n" +"struct VS_OUTPUT\n" +"{\n" +" float4 Position : SV_POSITION;\n" +"};\n" +"\n" +"VS_OUTPUT ENTRY_POINT (VS_INPUT input)\n" +"{\n" +" return input;\n" +"}\n"; +#endif diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/collect_hlsl_headers.py b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/collect_hlsl_headers.py new file mode 100644 index 0000000000..a6f13950b8 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/collect_hlsl_headers.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# GStreamer +# Copyright (C) 2023 Seungha Yang <seungha@centricular.com> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Library General Public +# License as published by the Free Software Foundation; either +# version 2 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Library General Public License for more details. +# +# You should have received a copy of the GNU Library General Public +# License along with this library; if not, write to the +# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, +# Boston, MA 02110-1301, USA. + +import sys +import os +import argparse + +start_header = """/* + * This file is autogenerated by collect_hlsl_header.py + */ +#pragma once + +""" + +start_map = """ +#define MAKE_BYTECODE(name) { G_STRINGIFY (name), { g_##name, sizeof (g_##name)} } +static std::unordered_map<std::string, std::pair<const BYTE *, SIZE_T>> +""" + +end_map = """}; +#undef MAKE_BYTECODE +""" + +def main(args): + parser = argparse.ArgumentParser(description='Read precompiled HLSL headers from directory and make single header') + parser.add_argument("--input", help="the precompiled HLSL header directory") + parser.add_argument("--output", help="output header file location") + parser.add_argument("--prefix", help="HLSL header filename prefix") + parser.add_argument("--name", help="Hash map variable name") + args = parser.parse_args(args) + + # Scan precompiled PSMain_*.h headers in build directory + # and generate single header + hlsl_headers = [os.path.basename(file) for file in os.listdir(args.input) if file.startswith(args.prefix) and file.endswith(".h") ] + + with open(args.output, 'w', newline='\n', encoding='utf8') as f: + f.write(start_header) + for file in hlsl_headers: + f.write("#include \"") + f.write(file) + f.write("\"\n") + f.write(start_map) + f.write(args.name) + f.write(" = {\n") + for file in hlsl_headers: + f.write(" MAKE_BYTECODE ({}),\n".format(os.path.splitext(file)[0])) + f.write(end_map) + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h new file mode 100644 index 0000000000..67a81deeaa --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/hlsl.h @@ -0,0 +1,32 @@ +/* GStreamer + * Copyright (C) 2023 Seungha Yang <seungha@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#pragma once + +#include "PSMain_checker_luma.hlsl" +#include "PSMain_checker_rgb.hlsl" +#include "PSMain_checker_vuya.hlsl" +#include "PSMain_checker.hlsl" +#include "PSMain_color.hlsl" +#include "PSMain_sample_premul.hlsl" +#include "PSMain_sample.hlsl" +#include "PSMain_snow.hlsl" +#include "VSMain_color.hlsl" +#include "VSMain_coord.hlsl" +#include "VSMain_pos.hlsl" diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build new file mode 100644 index 0000000000..fd23812fd9 --- /dev/null +++ b/subprojects/gst-plugins-bad/gst-libs/gst/d3dshader/plugin-hlsl/meson.build @@ -0,0 +1,65 @@ +hlsl_sources = [ + ['PSMain_checker_luma', 'ps'], + ['PSMain_checker_rgb', 'ps'], + ['PSMain_checker_vuya', 'ps'], + ['PSMain_checker', 'ps'], + ['PSMain_color', 'ps'], + ['PSMain_sample_premul', 'ps'], + ['PSMain_sample', 'ps'], + ['PSMain_snow', 'ps'], + ['VSMain_color', 'vs'], + ['VSMain_coord', 'vs'], + ['VSMain_pos', 'vs'], +] + +shader_model = '5_0' + +plugin_hlsl_precompiled = [] + +foreach shader : hlsl_sources + entry_point_prefix = shader.get(0) + target_prefix = shader.get(1) + source = files('@0@.hlsl'.format(entry_point_prefix)) + entry_point = '@0@_@1@'.format(entry_point_prefix, shader_model) + header = '@0@.h'.format(entry_point) + sm_target = '@0@_@1@'.format(target_prefix, shader_model) + compiled_shader = custom_target(header, + input : source, + output : header, + command : [fxc, '/Fh', '@OUTPUT@', + '/E', entry_point, + '/T', sm_target, + '/D', 'BUILDING_HLSL=1', + '/D', 'ENTRY_POINT=@0@'.format(entry_point), + '/nologo', + '@INPUT@']) + plugin_hlsl_precompiled += [compiled_shader] +endforeach + +header_collector = find_program('collect_hlsl_headers.py') + +plugin_ps_collection = custom_target('plugin_hlsl_ps', + input : plugin_hlsl_precompiled, + output : 'plugin_hlsl_ps.h', + command : [header_collector, + '--input', meson.current_build_dir(), + '--prefix', 'PSMain_', + '--name', 'g_plugin_ps_table', + '--output', '@OUTPUT@' + ]) + +plugin_vs_collection = custom_target('plugin_hlsl_vs', + input : plugin_hlsl_precompiled, + output : 'plugin_hlsl_vs.h', + command : [header_collector, + '--input', meson.current_build_dir(), + '--prefix', 'VSMain_', + '--name', 'g_plugin_vs_table', + '--output', '@OUTPUT@' + ]) + +hlsl_precompiled += [ + plugin_hlsl_precompiled, + plugin_ps_collection, + plugin_vs_collection, +] diff --git a/subprojects/gst-plugins-bad/gst-libs/gst/meson.build b/subprojects/gst-plugins-bad/gst-libs/gst/meson.build index 6207f3381a..b7fbe98b56 100644 --- a/subprojects/gst-plugins-bad/gst-libs/gst/meson.build +++ b/subprojects/gst-plugins-bad/gst-libs/gst/meson.build @@ -5,6 +5,7 @@ subdir('audio') subdir('basecamerabinsrc') subdir('codecparsers') subdir('codecs') +subdir('d3dshader') subdir('d3d11') # cuda can depend on d3d11 subdir('cuda') diff --git a/subprojects/gst-plugins-bad/meson_options.txt b/subprojects/gst-plugins-bad/meson_options.txt index 8d16a29aca..15c354c82b 100644 --- a/subprojects/gst-plugins-bad/meson_options.txt +++ b/subprojects/gst-plugins-bad/meson_options.txt @@ -210,6 +210,9 @@ option('isac', type : 'feature', value : 'auto', description : 'iSAC plugin') option('aja-sdk-dir', type : 'string', value : '', description : 'Directory with AJA SDK, e.g. ntv2sdklinux_16.0.0.4') +# D3D11/D3D12 HLSL library options +option('d3d-hlsl-precompile', type : 'feature', value : 'auto', description : 'Enable buildtime HLSL compile for d3d11/d3d12 library/plugin') + # D3D11 plugin options option('d3d11-math', type : 'feature', value : 'auto', description : 'Enable DirectX SIMD Math support') option('d3d11-hlsl-precompile', type : 'feature', value : 'auto', description : 'Enable buildtime HLSL compile for d3d11 library/plugin')