better table set

2025-12-29 19:40:47 +00:00 · 2025-12-29 19:40:47 +00:00 · 819dc6a93e
parent 697d6229af
commit 819dc6a93e
1 changed files with 27 additions and 64 deletions
--- a/externals/bc_decoder/bc_decoder.cpp
+++ b/externals/bc_decoder/bc_decoder.cpp
@ -107,36 +107,32 @@ namespace {
            int32_t c[8] = {0};

            if (isSigned) {
-                c[0] = static_cast<signed char>(data & 0xFF);
-                c[1] = static_cast<signed char>((data & 0xFF00) >> 8);
+                c[0] = int8_t(data & 0xFF);
+                c[1] = int8_t((data & 0xFF00) >> 8);
            } else {
-                c[0] = static_cast<uint8_t>(data & 0xFF);
-                c[1] = static_cast<uint8_t>((data & 0xFF00) >> 8);
+                c[0] = uint8_t(data & 0xFF);
+                c[1] = uint8_t((data & 0xFF00) >> 8);
            }

            if (c[0] > c[1]) {
-                for (int32_t i = 2; i < 8; ++i) {
+                for (int32_t i = 2; i < 8; ++i)
                    c[i] = ((8 - i) * c[0] + (i - 1) * c[1]) / 7;
-                }
            } else {
-                for (int32_t i = 2; i < 6; ++i) {
+                for (int32_t i = 2; i < 6; ++i)
                    c[i] = ((6 - i) * c[0] + (i - 1) * c[1]) / 5;
-                }
                c[6] = isSigned ? -128 : 0;
                c[7] = isSigned ? 127 : 255;
            }

-            for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++) {
-                for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++) {
-                    dst[channel + (i * dstBpp) + (j * dstPitch)] = static_cast<uint8_t>(c[getIdx((j * BlockHeight) + i)]);
-                }
-            }
+            for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++)
+                for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++)
+                    dst[channel + (i * dstBpp) + (j * dstPitch)] = uint8_t(c[getIdx((j * BlockHeight) + i)]);
        }

      private:
        uint8_t getIdx(int32_t i) const {
            int32_t offset = i * 3 + 16;
-            return static_cast<uint8_t>((data & (0x7ull << offset)) >> offset);
+            return uint8_t((data & (0x7ull << offset)) >> offset);
        }

        uint64_t data;
@ -148,9 +144,8 @@ namespace {
            dst += 3;  // Write only to alpha (channel 3)
            for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++, dst += dstPitch) {
                uint8_t *dstRow = dst;
-                for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, dstRow += dstBpp) {
+                for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, dstRow += dstBpp)
                    *dstRow = getAlpha(j * BlockHeight + i);
-                }
            }
        }

@ -158,7 +153,7 @@ namespace {
        uint8_t getAlpha(int32_t i) const {
            int32_t offset = i << 2;
            int32_t alpha = (data & (0xFull << offset)) >> offset;
-            return static_cast<uint8_t>(alpha | (alpha << 4));
+            return uint8_t(alpha | (alpha << 4));
        }

        uint64_t data;
@ -746,15 +741,9 @@ namespace {
                    0b01010000010100000101010100000000, 0b00000000010101010101000001010000,
                    0b00010101000101010001000000010000, 0b01010100010101000000010000000100,
                };
-                static const uint8_t AnchorTable2[MaxPartitions] = {
-                    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
-                    0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
-                    0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
-                    0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
-                    0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
-                    0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
-                    0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
-                    0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
+                static const uint32_t a_table[MaxPartitions / 8] = {
+                    0xffffffff, 0xffffffff, 0xf882282f, 0x22882282,
+                    0xff8286ff, 0x6ff22282, 0x22ff8626, 0xf22fffff,
                };
                // @fmt:on

@ -836,12 +825,11 @@ namespace {
                }

                // Get the indices, calculate final colors, and output
-                for (int32_t y = 0; y < 4; y++) {
-                    for (int32_t x = 0; x < 4; x++) {
-                        int32_t pixelNum = x + y * 4;
+                for (uint32_t y = 0; y < 4; y++) {
+                    for (uint32_t x = 0; x < 4; x++) {
+                        uint32_t pixelNum = x + y * 4, firstEndpoint = 0;
                        IndexInfo idx;
                        bool isAnchor = false;
-                        int32_t firstEndpoint = 0;
                        // Bc6H can have either 1 or 2 petitions depending on the mode.
                        // The number of petitions affects the number of indices with implicit
                        // leading 0 bits and the number of bits per index.
@ -852,7 +840,8 @@ namespace {
                        } else {
                            idx.num_bits = 3;
                            // There are 2 indices with implicit leading 0-bits.
-                            isAnchor = ((pixelNum == 0) || (pixelNum == AnchorTable2[partition]));
+                            uint32_t anchor_value = (a_table[partition / 8] >> (partition * 4)) & 0x0f;
+                            isAnchor = ((pixelNum == 0) || (pixelNum == anchor_value));
                            firstEndpoint = ((p_table[partition] >> pixelNum) & 0x03) * 2;
                        }

@ -1216,40 +1205,14 @@ namespace {
                // Table.A2 and Table.A3.""
                // Note: This is really confusing - I believe they meant subset instead of partition here.
                // s_index >= 0 && s_index <= 2
-                alignas(64) static const uint8_t a_table[3][64] = {
-                    {
-                        0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
-                        0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
-                        0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
-                        0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
-                        0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
-                        0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
-                        0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
-                        0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
-                    }, {
-                        0x3, 0x3, 0xf, 0xf, 0x8, 0x3, 0xf, 0xf,
-                        0x8, 0x8, 0x6, 0x6, 0x6, 0x5, 0x3, 0x3,
-                        0x3, 0x3, 0x8, 0xf, 0x3, 0x3, 0x6, 0xa,
-                        0x5, 0x8, 0x8, 0x6, 0x8, 0x5, 0xf, 0xf,
-                        0x8, 0xf, 0x3, 0x5, 0x6, 0xa, 0x8, 0xf,
-                        0xf, 0x3, 0xf, 0x5, 0xf, 0xf, 0xf, 0xf,
-                        0x3, 0xf, 0x5, 0x5, 0x5, 0x8, 0x5, 0xa,
-                        0x5, 0xa, 0x8, 0xd, 0xf, 0xc, 0x3, 0x3,
-                    }, {
-                        0xf, 0x8, 0x8, 0x3, 0xf, 0xf, 0x3, 0x8,
-                        0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x8,
-                        0xf, 0x8, 0xf, 0x3, 0xf, 0x8, 0xf, 0x8,
-                        0x3, 0xf, 0x6, 0xa, 0xf, 0xf, 0xa, 0x8,
-                        0xf, 0x3, 0xf, 0xa, 0xa, 0x8, 0x9, 0xa,
-                        0x6, 0xf, 0x8, 0xf, 0x3, 0x6, 0x6, 0x8,
-                        0xf, 0x3, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
-                        0xf, 0xf, 0xf, 0xf, 0x3, 0xf, 0xf, 0x8,
-                    }
+                alignas(64) static const uint32_t a_table[3][64 / 8] = {
+                    { 0xffffffff, 0xffffffff, 0xf882282f, 0x22882282, 0xff8286ff, 0x6ff22282, 0x22ff8626, 0xf22fffff },
+                    { 0xff38ff33, 0x33566688, 0xa633f833, 0xff586885, 0xf8a653f8, 0xffff5f3f, 0xa58555f3, 0x33cfd8a5 },
+                    { 0x83ff388f, 0x8fffffff, 0x8f8f3f8f, 0x8affa6f3, 0xa98aaf3f, 0x8663f8f6, 0xffffff3f, 0x8ff3ffff },
                };
-                // reading all faster because ternary logic is good
-                uint64_t const g0 = a_table[0][p_index];
-                uint64_t const g1 = a_table[1][p_index];
-                uint64_t const g2 = a_table[2][p_index];
+                uint64_t const g0 = (a_table[0][p_index / 8] >> (p_index * 4)) & 0x0f; // reading all faster because ternary logic is good
+                uint64_t const g1 = (a_table[1][p_index / 8] >> (p_index * 4)) & 0x0f;
+                uint64_t const g2 = (a_table[2][p_index / 8] >> (p_index * 4)) & 0x0f;
                uint64_t const lookup_table = 0x0000
                    | ((g1 << 16) | (g1 << 20) | (g0 << 24) | (g1 << 28))
                    | ((g2 << 32) | (g2 << 36) | (g2 << 40) | (g2 << 44));