From d7e3fd7fe8c02c162fffaef6ad03e0c631d16609 Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 29 Jan 2026 22:04:39 +0000 Subject: [PATCH] linearize GetBitLength --- src/video_core/host_shaders/astc_decoder.comp | 117 ++++++++---------- 1 file changed, 49 insertions(+), 68 deletions(-) diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index aa2468846e..6a36500b56 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -48,6 +48,7 @@ const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT; const uint BYTES_PER_BLOCK_LOG2 = 4; +// DO NOT CHANGE - code depends on the value of these! const uint JUST_BITS = 0u; const uint QUINT = 1u; const uint TRIT = 2u; @@ -55,12 +56,29 @@ const uint TRIT = 2u; // ASTC Encodings data, sorted in ascending order based on their BitLength value // (see GetBitLength() function) const uint encoding_values[22] = uint[]( - (JUST_BITS), (JUST_BITS | (1u << 8u)), (TRIT), (JUST_BITS | (2u << 8u)), - (QUINT), (TRIT | (1u << 8u)), (JUST_BITS | (3u << 8u)), (QUINT | (1u << 8u)), - (TRIT | (2u << 8u)), (JUST_BITS | (4u << 8u)), (QUINT | (2u << 8u)), (TRIT | (3u << 8u)), - (JUST_BITS | (5u << 8u)), (QUINT | (3u << 8u)), (TRIT | (4u << 8u)), (JUST_BITS | (6u << 8u)), - (QUINT | (4u << 8u)), (TRIT | (5u << 8u)), (JUST_BITS | (7u << 8u)), (QUINT | (5u << 8u)), - (TRIT | (6u << 8u)), (JUST_BITS | (8u << 8u))); + (JUST_BITS), + (JUST_BITS | (1u << 8u)), + (TRIT), + (JUST_BITS | (2u << 8u)), + (QUINT), + (TRIT | (1u << 8u)), + (JUST_BITS | (3u << 8u)), + (QUINT | (1u << 8u)), + (TRIT | (2u << 8u)), + (JUST_BITS | (4u << 8u)), + (QUINT | (2u << 8u)), + (TRIT | (3u << 8u)), + (JUST_BITS | (5u << 8u)), + (QUINT | (3u << 8u)), + (TRIT | (4u << 8u)), + (JUST_BITS | (6u << 8u)), + (QUINT | (4u << 8u)), + (TRIT | (5u << 8u)), + (JUST_BITS | (7u << 8u)), + (QUINT | (5u << 8u)), + (TRIT | (6u << 8u)), + (JUST_BITS | (8u << 8u)) +); // Input ASTC texture globals int total_bitsread = 0; @@ -157,22 +175,6 @@ uint FastReplicateTo6(uint value, uint num_bits) { return ReplicateBits(value, num_bits, 6); } -uint Div3Floor(uint v) { - return (v * 0x5556) >> 16; -} - -uint Div3Ceil(uint v) { - return Div3Floor(v + 2); -} - -uint Div5Floor(uint v) { - return (v * 0x3334) >> 16; -} - -uint Div5Ceil(uint v) { - return Div5Floor(v + 4); -} - uint Hash52(uint p) { p ^= p >> 15; p -= p << 17; @@ -260,15 +262,16 @@ EncodingData GetEncodingFromVector(uint index) { // Returns the number of bits required to encode n_vals values. uint GetBitLength(uint n_vals, uint encoding_index) { + // uint Div3Floor(uint v) { return (v * 0x5556) >> 16; } + // uint Div3Ceil(uint v) { return Div3Floor(v + 2); } + // uint Div5Floor(uint v) { return (v * 0x3334) >> 16; } + // uint Div5Ceil(uint v) { return Div5Floor(v + 4); } const EncodingData encoding_value = EncodingData(encoding_values[encoding_index]); const uint encoding = Encoding(encoding_value); - uint total_bits = NumBits(encoding_value) * n_vals; - if (encoding == TRIT) { - total_bits += Div5Ceil(n_vals * 8); - } else if (encoding == QUINT) { - total_bits += Div3Ceil(n_vals * 7); - } - return total_bits; + const uvec3 div_constant = uvec3(0, 0x5556, 0x3334); + return NumBits(encoding_value) * n_vals + + ((((n_vals * ((0x870 >> (encoding * 4)) & 0xf)) + ((0x420 >> (encoding * 4)) & 0xf)) + * div_constant[encoding]) >> 16); } uint GetNumWeightValues(uvec2 size, bool dual_plane) { @@ -423,11 +426,10 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits, o const uint encoding = Encoding(val); const uint bitlen = NumBits(val); const uint bitval = BitValue(val); - uint A = 0, B = 0, C = 0, D = 0; - A = ReplicateBitTo9((bitval & 1)); + uint B = 0, C = 0, D = 0; + uint A = ReplicateBitTo9((bitval & 1)); switch (encoding) { case JUST_BITS: - color_values[++out_index] = FastReplicateTo8(bitval, bitlen); break; case TRIT: { D = QuintTritValue(val); @@ -502,12 +504,12 @@ void DecodeColorValues(uvec4 modes, uint num_partitions, uint color_data_bits, o break; } } - if (encoding != JUST_BITS) { - uint T = (D * C) + B; - T ^= A; - T = (A & 0x80) | (T >> 2); - color_values[++out_index] = T; - } + uint unq = D * C + B; + unq = unq ^ A; + unq = (A & 0x80) | (unq >> 2); + color_values[++out_index] = encoding == JUST_BITS + ? FastReplicateTo8(bitval, bitlen) + : unq; } } @@ -566,12 +568,8 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, ui } case 5: { READ_INT_VALUES(4) - ivec2 transferred = BitTransferSigned(V[0].y, V[0].x); - V[0].y = transferred.x; - V[0].x = transferred.y; - transferred = BitTransferSigned(V[0].w, V[0].z); - V[0].w = transferred.x; - V[0].z = transferred.y; + V[0].yx = BitTransferSigned(V[0].y, V[0].x); + V[0].wz = BitTransferSigned(V[0].w, V[0].z); ep1 = ClampByte(ivec4(V[0].z, V[0].x, V[0].x, V[0].x)); ep2 = ClampByte(ivec4(V[0].z + V[0].w, V[0].x + V[0].y, V[0].x + V[0].y, V[0].x + V[0].y)); break; @@ -595,15 +593,9 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, ui } case 9: { READ_INT_VALUES(6) - ivec2 transferred = BitTransferSigned(V[0].y, V[0].x); - V[0].y = transferred.x; - V[0].x = transferred.y; - transferred = BitTransferSigned(V[0].w, V[0].z); - V[0].w = transferred.x; - V[0].z = transferred.y; - transferred = BitTransferSigned(V[1].y, V[1].x); - V[1].y = transferred.x; - V[1].x = transferred.y; + V[0].yx = BitTransferSigned(V[0].y, V[0].x); + V[0].wz = BitTransferSigned(V[0].w, V[0].z); + V[1].yx = BitTransferSigned(V[1].y, V[1].x); if ((V[0].y + V[0].w + V[1].y) >= 0) { ep1 = ClampByte(ivec4(0xFF, V[0].x, V[0].z, V[1].x)); ep2 = ClampByte(ivec4(0xFF, V[0].x + V[0].y, V[0].z + V[0].w, V[1].x + V[1].y)); @@ -632,21 +624,10 @@ void ComputeEndpoints(out uvec4 ep1, out uvec4 ep2, uint color_endpoint_mode, ui } case 13: { READ_INT_VALUES(8) - ivec2 transferred = BitTransferSigned(V[0].y, V[0].x); - V[0].y = transferred.x; - V[0].x = transferred.y; - transferred = BitTransferSigned(V[0].w, V[0].z); - V[0].w = transferred.x; - V[0].z = transferred.y; - - transferred = BitTransferSigned(V[1].y, V[1].x); - V[1].y = transferred.x; - V[1].x = transferred.y; - - transferred = BitTransferSigned(V[1].w, V[1].z); - V[1].w = transferred.x; - V[1].z = transferred.y; - + V[0].yx = BitTransferSigned(V[0].y, V[0].x); + V[0].wz = BitTransferSigned(V[0].w, V[0].z); + V[1].yx = BitTransferSigned(V[1].y, V[1].x); + V[1].wz = BitTransferSigned(V[1].w, V[1].z); if ((V[0].y + V[0].w + V[1].y) >= 0) { ep1 = ClampByte(ivec4(V[1].z, V[0].x, V[0].z, V[1].x)); ep2 = ClampByte(ivec4(V[1].w + V[1].z, V[0].x + V[0].y, V[0].z + V[0].w, V[1].x + V[1].y));