diff --git a/externals/bc_decoder/bc_decoder.cpp b/externals/bc_decoder/bc_decoder.cpp index 3b48afc322..64ac204ca7 100644 --- a/externals/bc_decoder/bc_decoder.cpp +++ b/externals/bc_decoder/bc_decoder.cpp @@ -107,36 +107,32 @@ namespace { int32_t c[8] = {0}; if (isSigned) { - c[0] = static_cast(data & 0xFF); - c[1] = static_cast((data & 0xFF00) >> 8); + c[0] = int8_t(data & 0xFF); + c[1] = int8_t((data & 0xFF00) >> 8); } else { - c[0] = static_cast(data & 0xFF); - c[1] = static_cast((data & 0xFF00) >> 8); + c[0] = uint8_t(data & 0xFF); + c[1] = uint8_t((data & 0xFF00) >> 8); } if (c[0] > c[1]) { - for (int32_t i = 2; i < 8; ++i) { + for (int32_t i = 2; i < 8; ++i) c[i] = ((8 - i) * c[0] + (i - 1) * c[1]) / 7; - } } else { - for (int32_t i = 2; i < 6; ++i) { + for (int32_t i = 2; i < 6; ++i) c[i] = ((6 - i) * c[0] + (i - 1) * c[1]) / 5; - } c[6] = isSigned ? -128 : 0; c[7] = isSigned ? 127 : 255; } - for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++) { - for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++) { - dst[channel + (i * dstBpp) + (j * dstPitch)] = static_cast(c[getIdx((j * BlockHeight) + i)]); - } - } + for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++) + for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++) + dst[channel + (i * dstBpp) + (j * dstPitch)] = uint8_t(c[getIdx((j * BlockHeight) + i)]); } private: uint8_t getIdx(int32_t i) const { int32_t offset = i * 3 + 16; - return static_cast((data & (0x7ull << offset)) >> offset); + return uint8_t((data & (0x7ull << offset)) >> offset); } uint64_t data; @@ -148,9 +144,8 @@ namespace { dst += 3; // Write only to alpha (channel 3) for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++, dst += dstPitch) { uint8_t *dstRow = dst; - for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, dstRow += dstBpp) { + for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, dstRow += dstBpp) *dstRow = getAlpha(j * BlockHeight + i); - } } } @@ -158,7 +153,7 @@ namespace { uint8_t getAlpha(int32_t i) const { int32_t offset = i << 2; int32_t alpha = (data & (0xFull << offset)) >> offset; - return static_cast(alpha | (alpha << 4)); + return uint8_t(alpha | (alpha << 4)); } uint64_t data; @@ -746,15 +741,9 @@ namespace { 0b01010000010100000101010100000000, 0b00000000010101010101000001010000, 0b00010101000101010001000000010000, 0b01010100010101000000010000000100, }; - static const uint8_t AnchorTable2[MaxPartitions] = { - 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, - 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, - 0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf, - 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2, - 0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf, - 0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6, - 0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2, - 0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf, + static const uint32_t a_table[MaxPartitions / 8] = { + 0xffffffff, 0xffffffff, 0xf882282f, 0x22882282, + 0xff8286ff, 0x6ff22282, 0x22ff8626, 0xf22fffff, }; // @fmt:on @@ -836,12 +825,11 @@ namespace { } // Get the indices, calculate final colors, and output - for (int32_t y = 0; y < 4; y++) { - for (int32_t x = 0; x < 4; x++) { - int32_t pixelNum = x + y * 4; + for (uint32_t y = 0; y < 4; y++) { + for (uint32_t x = 0; x < 4; x++) { + uint32_t pixelNum = x + y * 4, firstEndpoint = 0; IndexInfo idx; bool isAnchor = false; - int32_t firstEndpoint = 0; // Bc6H can have either 1 or 2 petitions depending on the mode. // The number of petitions affects the number of indices with implicit // leading 0 bits and the number of bits per index. @@ -852,7 +840,8 @@ namespace { } else { idx.num_bits = 3; // There are 2 indices with implicit leading 0-bits. - isAnchor = ((pixelNum == 0) || (pixelNum == AnchorTable2[partition])); + uint32_t anchor_value = (a_table[partition / 8] >> (partition * 4)) & 0x0f; + isAnchor = ((pixelNum == 0) || (pixelNum == anchor_value)); firstEndpoint = ((p_table[partition] >> pixelNum) & 0x03) * 2; } @@ -1216,40 +1205,14 @@ namespace { // Table.A2 and Table.A3."" // Note: This is really confusing - I believe they meant subset instead of partition here. // s_index >= 0 && s_index <= 2 - alignas(64) static const uint8_t a_table[3][64] = { - { - 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, - 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, - 0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf, - 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2, - 0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf, - 0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6, - 0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2, - 0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf, - }, { - 0x3, 0x3, 0xf, 0xf, 0x8, 0x3, 0xf, 0xf, - 0x8, 0x8, 0x6, 0x6, 0x6, 0x5, 0x3, 0x3, - 0x3, 0x3, 0x8, 0xf, 0x3, 0x3, 0x6, 0xa, - 0x5, 0x8, 0x8, 0x6, 0x8, 0x5, 0xf, 0xf, - 0x8, 0xf, 0x3, 0x5, 0x6, 0xa, 0x8, 0xf, - 0xf, 0x3, 0xf, 0x5, 0xf, 0xf, 0xf, 0xf, - 0x3, 0xf, 0x5, 0x5, 0x5, 0x8, 0x5, 0xa, - 0x5, 0xa, 0x8, 0xd, 0xf, 0xc, 0x3, 0x3, - }, { - 0xf, 0x8, 0x8, 0x3, 0xf, 0xf, 0x3, 0x8, - 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x8, - 0xf, 0x8, 0xf, 0x3, 0xf, 0x8, 0xf, 0x8, - 0x3, 0xf, 0x6, 0xa, 0xf, 0xf, 0xa, 0x8, - 0xf, 0x3, 0xf, 0xa, 0xa, 0x8, 0x9, 0xa, - 0x6, 0xf, 0x8, 0xf, 0x3, 0x6, 0x6, 0x8, - 0xf, 0x3, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, - 0xf, 0xf, 0xf, 0xf, 0x3, 0xf, 0xf, 0x8, - } + alignas(64) static const uint32_t a_table[3][64 / 8] = { + { 0xffffffff, 0xffffffff, 0xf882282f, 0x22882282, 0xff8286ff, 0x6ff22282, 0x22ff8626, 0xf22fffff }, + { 0xff38ff33, 0x33566688, 0xa633f833, 0xff586885, 0xf8a653f8, 0xffff5f3f, 0xa58555f3, 0x33cfd8a5 }, + { 0x83ff388f, 0x8fffffff, 0x8f8f3f8f, 0x8affa6f3, 0xa98aaf3f, 0x8663f8f6, 0xffffff3f, 0x8ff3ffff }, }; - // reading all faster because ternary logic is good - uint64_t const g0 = a_table[0][p_index]; - uint64_t const g1 = a_table[1][p_index]; - uint64_t const g2 = a_table[2][p_index]; + uint64_t const g0 = (a_table[0][p_index / 8] >> (p_index * 4)) & 0x0f; // reading all faster because ternary logic is good + uint64_t const g1 = (a_table[1][p_index / 8] >> (p_index * 4)) & 0x0f; + uint64_t const g2 = (a_table[2][p_index / 8] >> (p_index * 4)) & 0x0f; uint64_t const lookup_table = 0x0000 | ((g1 << 16) | (g1 << 20) | (g0 << 24) | (g1 << 28)) | ((g2 << 32) | (g2 << 36) | (g2 << 40) | (g2 << 44));