From 47cb31a5efe7af8a944ffe6603bfa6762b392140 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Dec 2025 06:05:53 +0000 Subject: [PATCH] [bcn] ternary compose and simplify subsetIndex & anchordIndex --- externals/bc_decoder/bc_decoder.cpp | 739 ++++++++------------- externals/bc_decoder/bc_decoder.h | 44 +- src/video_core/texture_cache/decode_bc.cpp | 71 +- src/video_core/texture_cache/util.cpp | 25 +- 4 files changed, 337 insertions(+), 542 deletions(-) diff --git a/externals/bc_decoder/bc_decoder.cpp b/externals/bc_decoder/bc_decoder.cpp index 536c44f34b..57d6557439 100644 --- a/externals/bc_decoder/bc_decoder.cpp +++ b/externals/bc_decoder/bc_decoder.cpp @@ -1,5 +1,5 @@ // SPDX-License-Identifier: MPL-2.0 -// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Copyright 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) // Copyright 2019 The SwiftShader Authors. All Rights Reserved. // This BCn Decoder is directly derivative of Swiftshader's BCn Decoder found at: https://github.com/google/swiftshader/blob/d070309f7d154d6764cbd514b1a5c8bfcef61d06/src/Device/BC_Decoder.cpp @@ -8,12 +8,14 @@ #include #include +#include #include #include +#include namespace { - constexpr int BlockWidth = 4; - constexpr int BlockHeight = 4; + constexpr int32_t BlockWidth = 4; + constexpr int32_t BlockHeight = 4; struct BC_color { void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp, bool hasAlphaChannel, bool hasSeparateAlpha) const { @@ -25,16 +27,14 @@ namespace { c[3] = ((c[1] * 2) + c[0]) / 3; } else { c[2] = (c[0] + c[1]) >> 1; - if (hasAlphaChannel) { + if (hasAlphaChannel) c[3].clearAlpha(); - } } - for (int j = 0; j < BlockHeight && (y + j) < dstH; j++) { - size_t dstOffset = j * dstPitch; - size_t idxOffset = j * BlockHeight; + for (int32_t j = 0; j < BlockHeight && (y + j) < dstH; j++) { + size_t dstOffset = j * dstPitch, idxOffset = j * BlockHeight; for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, idxOffset++, dstOffset += dstBpp) { - *reinterpret_cast(dst + dstOffset) = c[getIdx(idxOffset)].pack8888(); + *reinterpret_cast(dst + dstOffset) = c[getIdx(idxOffset)].pack8888(); } } } @@ -46,13 +46,13 @@ namespace { c[3] = 0xFF000000; } - void extract565(const unsigned int c565) { + void extract565(const uint32_t c565) { c[0] = ((c565 & 0x0000001F) << 3) | ((c565 & 0x0000001C) >> 2); c[1] = ((c565 & 0x000007E0) >> 3) | ((c565 & 0x00000600) >> 9); c[2] = ((c565 & 0x0000F800) >> 8) | ((c565 & 0x0000E000) >> 13); } - unsigned int pack8888() const { + uint32_t pack8888() const { return ((c[0] & 0xFF) << 16) | ((c[1] & 0xFF) << 8) | (c[2] & 0xFF) | c[3]; } @@ -60,56 +60,52 @@ namespace { c[3] = 0; } - Color operator*(int factor) const { + Color operator*(int32_t factor) const { Color res; - for (int i = 0; i < 4; ++i) { + for (int32_t i = 0; i < 4; ++i) { res.c[i] = c[i] * factor; } return res; } - Color operator/(int factor) const { + Color operator/(int32_t factor) const { Color res; - for (int i = 0; i < 4; ++i) { + for (int32_t i = 0; i < 4; ++i) res.c[i] = c[i] / factor; - } return res; } - Color operator>>(int shift) const { + Color operator>>(int32_t shift) const { Color res; - for (int i = 0; i < 4; ++i) { + for (int32_t i = 0; i < 4; ++i) res.c[i] = c[i] >> shift; - } return res; } Color operator+(Color const &obj) const { Color res; - for (int i = 0; i < 4; ++i) { + for (int32_t i = 0; i < 4; ++i) res.c[i] = c[i] + obj.c[i]; - } return res; } - - private: - int c[4]; + private: + int32_t c[4]; }; - size_t getIdx(int i) const { + size_t getIdx(int32_t i) const { size_t offset = i << 1; // 2 bytes per index return (idx & (0x3 << offset)) >> offset; } unsigned short c0; unsigned short c1; - unsigned int idx; + uint32_t idx; }; static_assert(sizeof(BC_color) == 8, "BC_color must be 8 bytes"); struct BC_channel { void decode(uint8_t *dst, size_t x, size_t y, size_t dstW, size_t dstH, size_t dstPitch, size_t dstBpp, size_t channel, bool isSigned) const { - int c[8] = {0}; + int32_t c[8] = {0}; if (isSigned) { c[0] = static_cast(data & 0xFF); @@ -120,11 +116,11 @@ namespace { } if (c[0] > c[1]) { - for (int i = 2; i < 8; ++i) { + for (int32_t i = 2; i < 8; ++i) { c[i] = ((8 - i) * c[0] + (i - 1) * c[1]) / 7; } } else { - for (int i = 2; i < 6; ++i) { + for (int32_t i = 2; i < 6; ++i) { c[i] = ((6 - i) * c[0] + (i - 1) * c[1]) / 5; } c[6] = isSigned ? -128 : 0; @@ -139,8 +135,8 @@ namespace { } private: - uint8_t getIdx(int i) const { - int offset = i * 3 + 16; + uint8_t getIdx(int32_t i) const { + int32_t offset = i * 3 + 16; return static_cast((data & (0x7ull << offset)) >> offset); } @@ -160,9 +156,9 @@ namespace { } private: - uint8_t getAlpha(int i) const { - int offset = i << 2; - int alpha = (data & (0xFull << offset)) >> offset; + uint8_t getAlpha(int32_t i) const { + int32_t offset = i << 2; + int32_t alpha = (data & (0xFull << offset)) >> offset; return static_cast(alpha | (alpha << 4)); } @@ -171,7 +167,7 @@ namespace { static_assert(sizeof(BC_alpha) == 8, "BC_alpha must be 8 bytes"); namespace BC6H { - static constexpr int MaxPartitions = 64; + static constexpr int32_t MaxPartitions = 64; // @fmt:off @@ -282,7 +278,7 @@ namespace { : rgba(r, g, b) { } - Color(int r, int g, int b) + Color(int32_t r, int32_t g, int32_t b) : rgba((uint16_t) r, (uint16_t) g, (uint16_t) b) { } @@ -325,7 +321,7 @@ namespace { return (val ^ mask) - mask; } - static int constexpr RGBfChannels = 3; + static int32_t constexpr RGBfChannels = 3; struct RGBf { uint16_t channel[RGBfChannels]; size_t size[RGBfChannels]; @@ -336,7 +332,7 @@ namespace { static_assert(sizeof(channel) / sizeof(channel[0]) == RGBfChannels, "RGBf must have exactly 3 channels"); static_assert(sizeof(channel) / sizeof(channel[0]) == sizeof(size) / sizeof(size[0]), "RGBf requires equally sized arrays for channels and channel sizes"); - for (int i = 0; i < RGBfChannels; i++) { + for (int32_t i = 0; i < RGBfChannels; i++) { channel[i] = 0; size[i] = 0; } @@ -345,7 +341,7 @@ namespace { } void extendSign() { - for (int i = 0; i < RGBfChannels; i++) { + for (int32_t i = 0; i < RGBfChannels; i++) { channel[i] = BC6H::extendSign(channel[i], size[i]); } } @@ -355,7 +351,7 @@ namespace { // // The final computed endpoint is truncated to the base-endpoint's size; void resolveDelta(RGBf base) { - for (int i = 0; i < RGBfChannels; i++) { + for (int32_t i = 0; i < RGBfChannels; i++) { size[i] = base.size[i]; channel[i] = (base.channel[i] + channel[i]) & ((1 << base.size[i]) - 1); } @@ -377,7 +373,7 @@ namespace { } void unquantizeUnsigned() { - for (int i = 0; i < RGBfChannels; i++) { + for (int32_t i = 0; i < RGBfChannels; i++) { if (size[i] >= 15 || channel[i] == 0) { continue; } else if (channel[i] == ((1u << size[i]) - 1)) { @@ -392,7 +388,7 @@ namespace { } void unquantizeSigned() { - for (int i = 0; i < RGBfChannels; i++) { + for (int32_t i = 0; i < RGBfChannels; i++) { if (size[i] >= 16 || channel[i] == 0) { continue; } @@ -447,19 +443,19 @@ namespace { } assert(MSB - LSB + 1 < sizeof(uint32_t) * 8); - uint32_t numBits = MSB - LSB + 1; - uint32_t mask = (1 << numBits) - 1; + uint32_t num_bits = MSB - LSB + 1; + uint32_t mask = (1 << num_bits) - 1; // Read the low N bits uint32_t bits = (low64 & mask); - low64 >>= numBits; + low64 >>= num_bits; // Put the low N bits of high64 into the high 64-N bits of low64 - low64 |= (high64 & mask) << (sizeof(high64) * 8 - numBits); - high64 >>= numBits; + low64 |= (high64 & mask) << (sizeof(high64) * 8 - num_bits); + high64 >>= num_bits; if (reversed) { uint32_t tmp = 0; - for (uint32_t numSwaps = 0; numSwaps < numBits; numSwaps++) { + for (uint32_t numSwaps = 0; numSwaps < num_bits; numSwaps++) { tmp <<= 1; tmp |= (bits & 1); bits >>= 1; @@ -474,7 +470,7 @@ namespace { struct IndexInfo { uint64_t value; - int numBits; + int64_t num_bits; }; // Interpolates between two endpoints, then does a final unquantization step @@ -485,13 +481,13 @@ namespace { static constexpr uint32_t const *weightsN[] = { nullptr, nullptr, nullptr, weights3, weights4 }; - auto weights = weightsN[index.numBits]; + auto weights = weightsN[index.num_bits]; assert(weights != nullptr); Color color; uint32_t e0Weight = 64 - weights[index.value]; uint32_t e1Weight = weights[index.value]; - for (int i = 0; i < RGBfChannels; i++) { + for (int32_t i = 0; i < RGBfChannels; i++) { int32_t e0Channel = e0.channel[i]; int32_t e1Channel = e1.channel[i]; @@ -554,17 +550,17 @@ namespace { }; struct ModeDesc { - int number; + int32_t number; bool hasDelta; - int partitionCount; - int endpointBits; + int32_t partitionCount; + int32_t endpointBits; DeltaBits deltaBits; constexpr ModeDesc() : number(-1), hasDelta(false), partitionCount(0), endpointBits(0) { } - constexpr ModeDesc(int number, bool hasDelta, int partitionCount, int endpointBits, DeltaBits deltaBits) + constexpr ModeDesc(int32_t number, bool hasDelta, int32_t partitionCount, int32_t endpointBits, DeltaBits deltaBits) : number(number), hasDelta(hasDelta), partitionCount(partitionCount), endpointBits(endpointBits), deltaBits(deltaBits) { } }; @@ -572,26 +568,26 @@ namespace { struct BlockDesc { DataType type; Channel channel; - int MSB; - int LSB; + int32_t MSB; + int32_t LSB; ModeDesc modeDesc; constexpr BlockDesc() : type(End), channel(None), MSB(0), LSB(0), modeDesc() { } - constexpr BlockDesc(const DataType type, Channel channel, int MSB, int LSB, ModeDesc modeDesc) + constexpr BlockDesc(const DataType type, Channel channel, int32_t MSB, int32_t LSB, ModeDesc modeDesc) : type(type), channel(channel), MSB(MSB), LSB(LSB), modeDesc(modeDesc) { } - constexpr BlockDesc(DataType type, Channel channel, int MSB, int LSB) + constexpr BlockDesc(DataType type, Channel channel, int32_t MSB, int32_t LSB) : type(type), channel(channel), MSB(MSB), LSB(LSB), modeDesc() { } }; // Turns a legal mode into an index into the BlockDesc table. // Illegal or reserved modes return -1. - static int modeToIndex(uint8_t mode) { + static int32_t modeToIndex(uint8_t mode) { if (mode <= 3) { return mode; } else if ((mode & 0x2) != 0) { @@ -624,9 +620,9 @@ namespace { // the variable is min(LSB, MSB). // // Invalid or reserved modes return an empty list. - static constexpr int NumBlocks = 14; + static constexpr int32_t NumBlocks = 14; // The largest number of descriptions within a block. - static constexpr int MaxBlockDescIndex = 26; + static constexpr int32_t MaxBlockDescIndex = 26; static constexpr BlockDesc blockDescs[NumBlocks][MaxBlockDescIndex] = { // @fmt:off // Mode 0, Index 0 @@ -808,11 +804,11 @@ namespace { mode = data.consumeBits(4, 0); } - int blockIndex = modeToIndex(mode); + int32_t blockIndex = modeToIndex(mode); // Handle illegal or reserved mode if (blockIndex == -1) { - for (int y = 0; y < 4 && y + dstY < dstHeight; y++) { - for (int x = 0; x < 4 && x + dstX < dstWidth; x++) { + for (int32_t y = 0; y < 4 && y + dstY < dstHeight; y++) { + for (int32_t x = 0; x < 4 && x + dstX < dstWidth; x++) { auto out = reinterpret_cast(dst + sizeof(Color) * x + dstPitch * y); out->rgba = {0, 0, 0}; } @@ -824,9 +820,9 @@ namespace { RGBf e[4]; e[0].isSigned = e[1].isSigned = e[2].isSigned = e[3].isSigned = isSigned; - int partition = 0; + int32_t partition = 0; ModeDesc modeDesc; - for (int index = 0; blockDesc[index].type != End; index++) { + for (int32_t index = 0; blockDesc[index].type != End; index++) { const BlockDesc desc = blockDesc[index]; switch (desc.type) { @@ -835,7 +831,7 @@ namespace { assert(modeDesc.number == mode); e[0].size[0] = e[0].size[1] = e[0].size[2] = modeDesc.endpointBits; - for (int i = 0; i < RGBfChannels; i++) { + for (int32_t i = 0; i < RGBfChannels; i++) { if (modeDesc.hasDelta) { e[1].size[i] = e[2].size[i] = e[3].size[i] = modeDesc.deltaBits.channel[i]; } else { @@ -860,49 +856,49 @@ namespace { // Sign extension if (isSigned) { - for (int ep = 0; ep < modeDesc.partitionCount * 2; ep++) { + for (int32_t ep = 0; ep < modeDesc.partitionCount * 2; ep++) { e[ep].extendSign(); } } else if (modeDesc.hasDelta) { // Don't sign-extend the base endpoint in an unsigned format. - for (int ep = 1; ep < modeDesc.partitionCount * 2; ep++) { + for (int32_t ep = 1; ep < modeDesc.partitionCount * 2; ep++) { e[ep].extendSign(); } } // Turn the deltas into endpoints if (modeDesc.hasDelta) { - for (int ep = 1; ep < modeDesc.partitionCount * 2; ep++) { + for (int32_t ep = 1; ep < modeDesc.partitionCount * 2; ep++) { e[ep].resolveDelta(e[0]); } } - for (int ep = 0; ep < modeDesc.partitionCount * 2; ep++) { + for (int32_t ep = 0; ep < modeDesc.partitionCount * 2; ep++) { e[ep].unquantize(); } // Get the indices, calculate final colors, and output - for (int y = 0; y < 4; y++) { - for (int x = 0; x < 4; x++) { - int pixelNum = x + y * 4; + for (int32_t y = 0; y < 4; y++) { + for (int32_t x = 0; x < 4; x++) { + int32_t pixelNum = x + y * 4; IndexInfo idx; bool isAnchor = false; - int firstEndpoint = 0; + int32_t firstEndpoint = 0; // Bc6H can have either 1 or 2 petitions depending on the mode. // The number of petitions affects the number of indices with implicit // leading 0 bits and the number of bits per index. if (modeDesc.partitionCount == 1) { - idx.numBits = 4; + idx.num_bits = 4; // There's an implicit leading 0 bit for the first idx isAnchor = (pixelNum == 0); } else { - idx.numBits = 3; + idx.num_bits = 3; // There are 2 indices with implicit leading 0-bits. isAnchor = ((pixelNum == 0) || (pixelNum == AnchorTable2[partition])); firstEndpoint = PartitionTable2[partition][pixelNum] * 2; } - idx.value = data.consumeBits(idx.numBits - isAnchor - 1, 0); + idx.value = data.consumeBits(idx.num_bits - isAnchor - 1, 0); // Don't exit the loop early, we need to consume these index bits regardless if // we actually output them or not. @@ -925,10 +921,10 @@ namespace { // https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format struct Bitfield { - int offset; - int count; + uint32_t offset; + uint32_t count; - constexpr Bitfield Then(const int bits) { return {offset + count, bits}; } + constexpr Bitfield Then(const uint32_t bits) { return {offset + count, bits}; } constexpr bool operator==(const Bitfield &rhs) { return offset == rhs.offset && count == rhs.count; @@ -936,20 +932,20 @@ namespace { }; struct Mode { - const int IDX; // Mode index - const int NS; // Number of subsets in each partition - const int PB; // Partition bits - const int RB; // Rotation bits - const int ISB; // Index selection bits - const int CB; // Color bits - const int AB; // Alpha bits - const int EPB; // Endpoint P-bits - const int SPB; // Shared P-bits - const int IB; // Primary index bits per element - const int IBC; // Primary index bits total - const int IB2; // Secondary index bits per element + const uint32_t IDX; // Mode index + const uint32_t NS; // Number of subsets in each partition + const uint32_t PB; // Partition bits + const uint32_t RB; // Rotation bits + const uint32_t ISB; // Index selection bits + const uint32_t CB; // Color bits + const uint32_t AB; // Alpha bits + const uint32_t EPB; // Endpoint P-bits + const uint32_t SPB; // Shared P-bits + const uint32_t IB; // Primary index bits per element + const uint32_t IBC; // Primary index bits total + const uint32_t IB2; // Secondary index bits per element - constexpr int NumColors() const { return NS * 2; } + constexpr uint32_t NumColors() const { return NS * 2; } constexpr Bitfield Partition() const { return {IDX + 1, PB}; } @@ -957,23 +953,23 @@ namespace { constexpr Bitfield IndexSelection() const { return Rotation().Then(ISB); } - constexpr Bitfield Red(int idx) const { + constexpr Bitfield Red(uint32_t idx) const { return IndexSelection().Then(CB * idx).Then(CB); } - constexpr Bitfield Green(int idx) const { + constexpr Bitfield Green(uint32_t idx) const { return Red(NumColors() - 1).Then(CB * idx).Then(CB); } - constexpr Bitfield Blue(int idx) const { + constexpr Bitfield Blue(uint32_t idx) const { return Green(NumColors() - 1).Then(CB * idx).Then(CB); } - constexpr Bitfield Alpha(int idx) const { + constexpr Bitfield Alpha(uint32_t idx) const { return Blue(NumColors() - 1).Then(AB * idx).Then(AB); } - constexpr Bitfield EndpointPBit(int idx) const { + constexpr Bitfield EndpointPBit(uint32_t idx) const { return Alpha(NumColors() - 1).Then(EPB * idx).Then(EPB); } @@ -985,313 +981,92 @@ namespace { return SharedPBit0().Then(SPB); } - constexpr Bitfield PrimaryIndex(int offset, int count) const { + constexpr Bitfield PrimaryIndex(uint32_t offset, uint32_t count) const { return SharedPBit1().Then(offset).Then(count); } - constexpr Bitfield SecondaryIndex(int offset, int count) const { + constexpr Bitfield SecondaryIndex(uint32_t offset, uint32_t count) const { return SharedPBit1().Then(IBC + offset).Then(count); } }; - static constexpr Mode Modes[] = { - // IDX NS PB RB ISB CB AB EPB SPB IB IBC, IB2 - /**/ {0x0, 0x3, 0x4, 0x0, 0x0, 0x4, 0x0, 0x1, 0x0, 0x3, 0x2d, 0x0}, -/**/ {0x1, 0x2, 0x6, 0x0, 0x0, 0x6, 0x0, 0x0, 0x1, 0x3, 0x2e, 0x0}, -/**/ {0x2, 0x3, 0x6, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x2, 0x1d, 0x0}, -/**/ {0x3, 0x2, 0x6, 0x0, 0x0, 0x7, 0x0, 0x1, 0x0, 0x2, 0x1e, 0x0}, -/**/ {0x4, 0x1, 0x0, 0x2, 0x1, 0x5, 0x6, 0x0, 0x0, 0x2, 0x1f, 0x3}, -/**/ {0x5, 0x1, 0x0, 0x2, 0x0, 0x7, 0x8, 0x0, 0x0, 0x2, 0x1f, 0x2}, -/**/ {0x6, 0x1, 0x0, 0x0, 0x0, 0x7, 0x7, 0x1, 0x0, 0x4, 0x3f, 0x0}, -/**/ {0x7, 0x2, 0x6, 0x0, 0x0, 0x5, 0x5, 0x1, 0x0, 0x2, 0x1e, 0x0}, -/**/ {-1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x00, 0x0}, - }; - - static constexpr int MaxPartitions = 64; - static constexpr int MaxSubsets = 3; - - static constexpr uint8_t PartitionTable2[MaxPartitions][16] = { - {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1}, - {0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1}, - {0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1}, - {0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1}, - {0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1}, - {0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1}, - {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1}, - {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1}, - {0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0}, - {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0}, - {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, - {0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1}, - {0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0}, - {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0}, - {0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0}, - {0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0}, - {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0}, - {0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0}, - {0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0}, - {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, - {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1}, - {0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0}, - {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0}, - {0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0}, - {0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0}, - {0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1}, - {0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1}, - {0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0}, - {0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0}, - {0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, - {0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0}, - {0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0}, - {0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1}, - {0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1}, - {0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0}, - {0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0}, - {0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0}, - {0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1}, - {0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1}, - {0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0}, - {0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0}, - {0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1}, - {0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1}, - {0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1}, - {0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1}, - {0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1}, - {0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0}, - {0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0}, - {0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1}, - }; - - static constexpr uint8_t PartitionTable3[MaxPartitions][16] = { - {0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 1, 2, 2, 2, 2}, - {0, 0, 0, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1}, - {0, 0, 0, 0, 2, 0, 0, 1, 2, 2, 1, 1, 2, 2, 1, 1}, - {0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2}, - {0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 2, 2}, - {0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1}, - {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}, - {0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2}, - {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2}, - {0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2}, - {0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2, 0, 1, 1, 2}, - {0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2, 0, 1, 2, 2}, - {0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2}, - {0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0, 2, 2, 2, 0}, - {0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2}, - {0, 1, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 2, 2, 0, 0}, - {0, 0, 0, 0, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2}, - {0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 1, 1, 1, 1}, - {0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2, 0, 2, 2, 2}, - {0, 0, 0, 1, 0, 0, 0, 1, 2, 2, 2, 1, 2, 2, 2, 1}, - {0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2}, - {0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0}, - {0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0}, - {0, 0, 1, 2, 0, 0, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2}, - {0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1, 0}, - {0, 0, 0, 0, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 2, 1}, - {0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 0, 2, 0, 0, 2, 2}, - {0, 1, 1, 0, 0, 1, 1, 0, 2, 0, 0, 2, 2, 2, 2, 2}, - {0, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0, 0, 1, 1}, - {0, 0, 0, 0, 2, 0, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1}, - {0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 2, 2, 2}, - {0, 2, 2, 2, 0, 0, 2, 2, 0, 0, 1, 2, 0, 0, 1, 1}, - {0, 0, 1, 1, 0, 0, 1, 2, 0, 0, 2, 2, 0, 2, 2, 2}, - {0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0, 0, 1, 2, 0}, - {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0}, - {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0}, - {0, 1, 2, 0, 2, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2, 0}, - {0, 0, 1, 1, 2, 2, 0, 0, 1, 1, 2, 2, 0, 0, 1, 1}, - {0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 0, 0, 0, 0, 1, 1}, - {0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2}, - {0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1}, - {0, 0, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2, 1, 1, 2, 2}, - {0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 2, 2, 0, 0, 1, 1}, - {0, 2, 2, 0, 1, 2, 2, 1, 0, 2, 2, 0, 1, 2, 2, 1}, - {0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 1, 0, 1}, - {0, 0, 0, 0, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1}, - {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 2, 2, 2}, - {0, 2, 2, 2, 0, 1, 1, 1, 0, 2, 2, 2, 0, 1, 1, 1}, - {0, 0, 0, 2, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 1, 2}, - {0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2}, - {0, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 2, 2, 2}, - {0, 0, 0, 2, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 0, 2}, - {0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2}, - {0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 1, 1, 2}, - {0, 1, 1, 0, 0, 1, 1, 0, 2, 2, 2, 2, 2, 2, 2, 2}, - {0, 0, 2, 2, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 2, 2}, - {0, 0, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 0, 0, 2, 2}, - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2}, - {0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1}, - {0, 2, 2, 2, 1, 2, 2, 2, 0, 2, 2, 2, 1, 2, 2, 2}, - {0, 1, 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, - {0, 1, 1, 1, 2, 0, 1, 1, 2, 2, 0, 1, 2, 2, 2, 0}, - }; - - static constexpr uint8_t AnchorTable2[MaxPartitions] = { -// @fmt:off -0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, -0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, -0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf, -0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2, -0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf, -0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6, -0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2, -0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf, -// @fmt:on - }; - - static constexpr uint8_t AnchorTable3a[MaxPartitions] = { -// @fmt:off -0x3, 0x3, 0xf, 0xf, 0x8, 0x3, 0xf, 0xf, -0x8, 0x8, 0x6, 0x6, 0x6, 0x5, 0x3, 0x3, -0x3, 0x3, 0x8, 0xf, 0x3, 0x3, 0x6, 0xa, -0x5, 0x8, 0x8, 0x6, 0x8, 0x5, 0xf, 0xf, -0x8, 0xf, 0x3, 0x5, 0x6, 0xa, 0x8, 0xf, -0xf, 0x3, 0xf, 0x5, 0xf, 0xf, 0xf, 0xf, -0x3, 0xf, 0x5, 0x5, 0x5, 0x8, 0x5, 0xa, -0x5, 0xa, 0x8, 0xd, 0xf, 0xc, 0x3, 0x3, -// @fmt:on - }; - - static constexpr uint8_t AnchorTable3b[MaxPartitions] = { -// @fmt:off -0xf, 0x8, 0x8, 0x3, 0xf, 0xf, 0x3, 0x8, -0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x8, -0xf, 0x8, 0xf, 0x3, 0xf, 0x8, 0xf, 0x8, -0x3, 0xf, 0x6, 0xa, 0xf, 0xf, 0xa, 0x8, -0xf, 0x3, 0xf, 0xa, 0xa, 0x8, 0x9, 0xa, -0x6, 0xf, 0x8, 0xf, 0x3, 0x6, 0x6, 0x8, -0xf, 0x3, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, -0xf, 0xf, 0xf, 0xf, 0x3, 0xf, 0xf, 0x8, -// @fmt:on - }; - struct Color { struct RGB { - RGB() = default; - - RGB(uint8_t r, uint8_t g, uint8_t b) - : b(b), g(g), r(r) {} - - RGB(int r, int g, int b) - : b(static_cast(b)), g(static_cast(g)), r(static_cast(r)) {} - - RGB operator<<(int shift) const { return {r << shift, g << shift, b << shift}; } - - RGB operator>>(int shift) const { return {r >> shift, g >> shift, b >> shift}; } - - RGB operator|(int bits) const { return {r | bits, g | bits, b | bits}; } - - RGB operator|(const RGB &rhs) const { return {r | rhs.r, g | rhs.g, b | rhs.b}; } - - RGB operator+(const RGB &rhs) const { return {r + rhs.r, g + rhs.g, b + rhs.b}; } - + inline RGB() = default; + inline RGB(uint8_t r, uint8_t g, uint8_t b) : b(b), g(g), r(r) {} + inline RGB operator<<(uint32_t shift) const noexcept { + return {uint8_t(r << shift), uint8_t(g << shift), uint8_t(b << shift)}; + } + inline RGB operator>>(uint32_t shift) const noexcept { + return {uint8_t(r >> shift), uint8_t(g >> shift), uint8_t(b >> shift)}; + } + inline RGB operator|(uint32_t bits) const noexcept { + return {uint8_t(r | bits), uint8_t(g | bits), uint8_t(b | bits)}; + } + inline RGB operator|(RGB const& rhs) const noexcept { + return {uint8_t(r | rhs.r), uint8_t(g | rhs.g), uint8_t(b | rhs.b)}; + } + inline RGB operator+(RGB const& rhs) const noexcept { + return {uint8_t(r + rhs.r), uint8_t(g + rhs.g), uint8_t(b + rhs.b)}; + } uint8_t b; uint8_t g; uint8_t r; }; - RGB rgb; uint8_t a; }; - static_assert(sizeof(Color) == 4, "Color size must be 4 bytes"); struct Block { constexpr uint64_t Get(const Bitfield &bf) const { uint64_t mask = (1ULL << bf.count) - 1; - if (bf.offset + bf.count <= 64) { + if (bf.offset + bf.count <= 64) return (low >> bf.offset) & mask; - } - if (bf.offset >= 64) { + if (bf.offset >= 64) return (high >> (bf.offset - 64)) & mask; - } return ((low >> bf.offset) | (high << (64 - bf.offset))) & mask; } - const Mode &mode() const { - if ((low & 0b00000001) != 0) { - return Modes[0]; - } - if ((low & 0b00000010) != 0) { - return Modes[1]; - } - if ((low & 0b00000100) != 0) { - return Modes[2]; - } - if ((low & 0b00001000) != 0) { - return Modes[3]; - } - if ((low & 0b00010000) != 0) { - return Modes[4]; - } - if ((low & 0b00100000) != 0) { - return Modes[5]; - } - if ((low & 0b01000000) != 0) { - return Modes[6]; - } - if ((low & 0b10000000) != 0) { - return Modes[7]; - } - return Modes[8]; // Invalid mode + Mode const& mode() const { + static const Mode m_table[8] = { + // IDX NS PB RB ISB CB AB EPB SPB IB IBC, IB2 + /**/ {0x0, 0x3, 0x4, 0x0, 0x0, 0x4, 0x0, 0x1, 0x0, 0x3, 0x2d, 0x0}, + /**/ {0x1, 0x2, 0x6, 0x0, 0x0, 0x6, 0x0, 0x0, 0x1, 0x3, 0x2e, 0x0}, + /**/ {0x2, 0x3, 0x6, 0x0, 0x0, 0x5, 0x0, 0x0, 0x0, 0x2, 0x1d, 0x0}, + /**/ {0x3, 0x2, 0x6, 0x0, 0x0, 0x7, 0x0, 0x1, 0x0, 0x2, 0x1e, 0x0}, + /**/ {0x4, 0x1, 0x0, 0x2, 0x1, 0x5, 0x6, 0x0, 0x0, 0x2, 0x1f, 0x3}, + /**/ {0x5, 0x1, 0x0, 0x2, 0x0, 0x7, 0x8, 0x0, 0x0, 0x2, 0x1f, 0x2}, + /**/ {0x6, 0x1, 0x0, 0x0, 0x0, 0x7, 0x7, 0x1, 0x0, 0x4, 0x3f, 0x0}, + /**/ {0x7, 0x2, 0x6, 0x0, 0x0, 0x5, 0x5, 0x1, 0x0, 0x2, 0x1e, 0x0}, + }; + return m_table[::ffs(low & 0b11111111)]; } struct IndexInfo { uint64_t value; - int numBits; + int64_t num_bits; }; uint8_t interpolate(uint8_t e0, uint8_t e1, const IndexInfo &index) const { - static constexpr uint16_t weights2[] = {0, 21, 43, 64}; - static constexpr uint16_t weights3[] = {0, 9, 18, 27, 37, 46, 55, 64}; - static constexpr uint16_t weights4[] = {0, 4, 9, 13, 17, 21, 26, 30, - 34, 38, 43, 47, 51, 55, 60, 64}; - static constexpr uint16_t const *weightsN[] = { - nullptr, nullptr, weights2, weights3, weights4 + static const uint16_t weightsN[5][16] = { + { 0, 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0, 0 }, + {0, 21, 43, 64}, // {0, 21, 43, 64} = (21 * n) + 1 + {0, 9, 18, 27, 37, 46, 55, 64}, + {0, 4, 9, 13, 17, 21, 26, 30, 34, 38, 43, 47, 51, 55, 60, 64} }; - auto weights = weightsN[index.numBits]; + auto weights = weightsN[index.num_bits]; assert(weights != nullptr); return (uint8_t) (((64 - weights[index.value]) * uint16_t(e0) + weights[index.value] * uint16_t(e1) + 32) >> 6); } void decode(uint8_t *dst, size_t dstX, size_t dstY, size_t dstWidth, size_t dstHeight, size_t dstPitch) const { - auto const &mode = this->mode(); - - if (mode.IDX < 0) // Invalid mode: - { - for (size_t y = 0; y < 4 && y + dstY < dstHeight; y++) { - for (size_t x = 0; x < 4 && x + dstX < dstWidth; x++) { - auto out = reinterpret_cast(dst + sizeof(Color) * x + dstPitch * y); - out->rgb = {0, 0, 0}; - out->a = 0; - } - } - return; - } - - using Endpoint = std::array; - std::array subsets; - + assert(low <= 0b11111111); + auto const& mode = this->mode(); + std::array, 3> subsets; for (size_t i = 0; i < mode.NS; i++) { - auto &subset = subsets[i]; + auto& subset = subsets[i]; subset[0].rgb.r = Get(mode.Red(i * 2 + 0)); subset[0].rgb.g = Get(mode.Green(i * 2 + 0)); subset[0].rgb.b = Get(mode.Blue(i * 2 + 0)); @@ -1344,86 +1119,161 @@ namespace { } } - int colorIndexBitOffset = 0; - int alphaIndexBitOffset = 0; - for (int y = 0; y < 4; y++) { - for (int x = 0; x < 4; x++) { - auto texelIdx = y * 4 + x; - auto partitionIdx = Get(mode.Partition()); + int32_t colorIndexBitOffset = 0, alphaIndexBitOffset = 0; + for (int32_t y = 0; y < 4; y++) { + for (int32_t x = 0; x < 4; x++) { + auto const texelIdx = y * 4 + x; + auto const partitionIdx = Get(mode.Partition()); assert(partitionIdx < MaxPartitions); - auto subsetIdx = subsetIndex(mode, partitionIdx, texelIdx); - assert(subsetIdx < MaxSubsets); - auto const &subset = subsets[subsetIdx]; - - auto anchorIdx = anchorIndex(mode, partitionIdx, subsetIdx); - auto isAnchor = anchorIdx == texelIdx; - auto colorIdx = colorIndex(mode, isAnchor, colorIndexBitOffset); - auto alphaIdx = alphaIndex(mode, isAnchor, alphaIndexBitOffset); - + auto const subsetIdx = subsetIndex(mode.NS, partitionIdx, texelIdx); + assert(subsetIdx < 3); + auto const& subset = subsets[subsetIdx]; + auto const anchorIdx = anchorIndex(mode.NS, partitionIdx, subsetIdx); + auto const isAnchor = anchorIdx == texelIdx; + auto const colorIdx = colorIndex(mode, isAnchor, colorIndexBitOffset); + auto const alphaIdx = alphaIndex(mode, isAnchor, alphaIndexBitOffset); if (y + dstY >= dstHeight || x + dstX >= dstWidth) { // Don't be tempted to skip early at the loops: // The calls to colorIndex() and alphaIndex() adjust bit // offsets that need to be carefully tracked. continue; } - - Color output; // Note: We flip r and b channels past this point as the texture storage is BGR while the output is RGB - output.rgb.r = interpolate(subset[0].rgb.b, subset[1].rgb.b, colorIdx); - output.rgb.g = interpolate(subset[0].rgb.g, subset[1].rgb.g, colorIdx); - output.rgb.b = interpolate(subset[0].rgb.r, subset[1].rgb.r, colorIdx); - output.a = interpolate(subset[0].a, subset[1].a, alphaIdx); - - switch (Get(mode.Rotation())) { - default: - break; - case 1: - std::swap(output.a, output.rgb.b); - break; - case 2: - std::swap(output.a, output.rgb.g); - break; - case 3: - std::swap(output.a, output.rgb.r); - break; - } - - auto out = reinterpret_cast(dst + sizeof(Color) * x + dstPitch * y); - *out = output; + uint8_t output[4] = { + interpolate(subset[0].rgb.b, subset[1].rgb.b, colorIdx), + interpolate(subset[0].rgb.g, subset[1].rgb.g, colorIdx), + interpolate(subset[0].rgb.r, subset[1].rgb.r, colorIdx), + interpolate(subset[0].a, subset[1].a, alphaIdx) + }; + std::swap(output[3], output[3 - Get(mode.Rotation())]); + reinterpret_cast(dst + dstPitch * y)[x] = *reinterpret_cast(&output); } } } - int subsetIndex(const Mode &mode, int partitionIdx, int texelIndex) const { - switch (mode.NS) { - default: - return 0; - case 2: - return PartitionTable2[partitionIdx][texelIndex]; - case 3: - return PartitionTable3[partitionIdx][texelIndex]; - } + uint32_t subsetIndex(uint32_t ns, uint32_t p_index, uint32_t t_index) const { + // ns := either 0,1,2, p_index %= 64, t_index %= 16 + // pad the bits out so we have an homogenous operation + alignas(64) static const uint32_t p_table[2][64] = { + { // before: 64*16, after: 64*2 + 0b01010000010100000101000001010000, 0b01000000010000000100000001000000, + 0b01010100010101000101010001010100, 0b01010100010100000101000001000000, + 0b01010000010000000100000000000000, 0b01010101010101000101010001010000, + 0b01010101010101000101000001000000, 0b01010100010100000100000000000000, + 0b01010000010000000000000000000000, 0b01010101010101010101010001010000, + 0b01010101010101000100000000000000, 0b01010100010000000000000000000000, + 0b01010101010101010101010001000000, 0b01010101010101010000000000000000, + 0b01010101010101010101010100000000, 0b01010101000000000000000000000000, + 0b01010101000101010000000100000000, 0b00000000000000000100000001010100, + 0b00010101000000010000000000000000, 0b00000000010000000101000001010100, + 0b00000000000000000100000001010000, 0b00010101000001010000000100000000, + 0b00000101000000010000000000000000, 0b01000000010100000101000001010100, + 0b00000000010000000100000001010000, 0b00000101000000010000000100000000, + 0b00010100000101000001010000010100, 0b00000101000101000001010001010000, + 0b00000001000101010101010001000000, 0b00000000010101010101010100000000, + 0b00010101000000010100000001010100, 0b00000101010000010100000101010000, + 0b01000100010001000100010001000100, 0b01010101000000000101010100000000, + 0b00010001010001000001000101000100, 0b00000101000001010101000001010000, + 0b00000101010100000000010101010000, 0b00010001000100010100010001000100, + 0b01000001000101000100000100010100, 0b01000100000100010001000101000100, + 0b00010101000001010101000001010100, 0b00000001000001010101000001000000, + 0b00000101000001000001000001010000, 0b00000101010001010101000101010000, + 0b00010100010000010100000100010100, 0b01010000000001010000010101010000, + 0b01000001010000010001010000010100, 0b00000000000101000001010000000000, + 0b00000000000001000001010100000100, 0b00000000000100000101010000010000, + 0b00010000010101000001000000000000, 0b00000100000101010000010000000000, + 0b01010000010000010000010100010100, 0b01000001000001010001010001010000, + 0b00000101010000010101000000010100, 0b00010100000001010100000101010000, + 0b01000001000001010000010100010100, 0b01000001010100000101000000010100, + 0b01000000000000010001010101010100, 0b01010100000101010000000101000000, + 0b01010000010100000101010100000000, 0b00000000010101010101000001010000, + 0b00010101000101010001000000010000, 0b01010100010101000000010000000100, + }, { // before: 64*16, after: 64*4 + 0b10101010011010000101000001010000, 0b01101010010110100101000001000000, + 0b01011010010110100100001000000000, 0b01010100010100001010000010101000, + 0b10100101101001010000000000000000, 0b10100000101000000101000001010000, + 0b01010101010101011010000010100000, 0b01011010010110100101000001010000, + 0b10101010010101010000000000000000, 0b10101010010101010101010100000000, + 0b10101010101010100101010100000000, 0b10010000100100001001000010010000, + 0b10010100100101001001010010010100, 0b10100100101001001010010010100100, + 0b10101001101001011001010001010000, 0b00101010000010100100001001010000, + 0b10100101100101000101000001000000, 0b00001010010000100101000001010100, + 0b10100101101001011010010100000000, 0b01010101101000001010000010100000, + 0b10101000101010000101010001010100, 0b01101010011010100100000001000000, + 0b10100100101001000101000000000000, 0b00011010000110100000010100000000, + 0b00000000010100001010010010100100, 0b10101010101001011001000010010000, + 0b00010100011010010110100100010100, 0b01101001011010010001010000000000, + 0b10100000100001011000010110100000, 0b10101010100000100001010000010100, + 0b01010000101001001010010001010000, 0b01101010010110100000001000000000, + 0b10101001101001011000000000000000, 0b01010000100100001010000010101000, + 0b10101000101000001001000001010000, 0b00100100001001000010010000100100, + 0b00000000101010100101010100000000, 0b00100100100100100100100100100100, + 0b00100100010010011001001000100100, 0b01010000101001010000101001010000, + 0b01010000000010101010010101010000, 0b10101010101010100100010001000100, + 0b01100110011001100000000000000000, 0b10100101101000001010010110100000, + 0b01010000101000000101000010100000, 0b01101001001010000110100100101000, + 0b01000100101010101010101001000100, 0b01100110011001100110011000000000, + 0b10101010010001000100010001000100, 0b01010100101010000101010010101000, + 0b10010101100000001001010110000000, 0b10010110100101101001011000000000, + 0b10101000010101000101010010101000, 0b10000000100101011001010110000000, + 0b10101010000101000001010000010100, 0b10010110100101100000000000000000, + 0b10101010101010100001010000010100, 0b10100000010100000101000010100000, + 0b10100000101001011010010110100000, 0b10010110000000000000000000000000, + 0b01000000100000000100000010000000, 0b10101001101010001010100110101000, + 0b10101010101010101010101001000100, 0b00101010010010100101001001010100, + } + }; + uint32_t const mask = (0x03010000 >> (ns * 8)) & 0xff; + return (p_table[ns & 0x01][p_index] >> (t_index << 1)) & mask; } - int anchorIndex(const Mode &mode, int partitionIdx, int subsetIdx) const { - // ARB_texture_compression_bptc states: - // "In partition zero, the anchor index is always index zero. + uint32_t anchorIndex(uint32_t ns, uint32_t p_index, uint32_t s_index)const { + // ARB_texture_compression_bptc states: "In partition zero, the anchor index is always index zero. // In other partitions, the anchor index is specified by tables // Table.A2 and Table.A3."" - // Note: This is really confusing - I believe they meant subset instead - // of partition here. - switch (subsetIdx) { - default: - return 0; - case 1: - return mode.NS == 2 ? AnchorTable2[partitionIdx] : AnchorTable3a[partitionIdx]; - case 2: - return AnchorTable3b[partitionIdx]; - } + // Note: This is really confusing - I believe they meant subset instead of partition here. + // s_index >= 0 && s_index <= 2 + alignas(64) static const uint8_t a_table[3][64] = { + { + 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, + 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, + 0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf, + 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2, + 0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf, + 0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6, + 0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2, + 0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf, + }, { + 0x3, 0x3, 0xf, 0xf, 0x8, 0x3, 0xf, 0xf, + 0x8, 0x8, 0x6, 0x6, 0x6, 0x5, 0x3, 0x3, + 0x3, 0x3, 0x8, 0xf, 0x3, 0x3, 0x6, 0xa, + 0x5, 0x8, 0x8, 0x6, 0x8, 0x5, 0xf, 0xf, + 0x8, 0xf, 0x3, 0x5, 0x6, 0xa, 0x8, 0xf, + 0xf, 0x3, 0xf, 0x5, 0xf, 0xf, 0xf, 0xf, + 0x3, 0xf, 0x5, 0x5, 0x5, 0x8, 0x5, 0xa, + 0x5, 0xa, 0x8, 0xd, 0xf, 0xc, 0x3, 0x3, + }, { + 0xf, 0x8, 0x8, 0x3, 0xf, 0xf, 0x3, 0x8, + 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x8, + 0xf, 0x8, 0xf, 0x3, 0xf, 0x8, 0xf, 0x8, + 0x3, 0xf, 0x6, 0xa, 0xf, 0xf, 0xa, 0x8, + 0xf, 0x3, 0xf, 0xa, 0xa, 0x8, 0x9, 0xa, + 0x6, 0xf, 0x8, 0xf, 0x3, 0x6, 0x6, 0x8, + 0xf, 0x3, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, + 0xf, 0xf, 0xf, 0xf, 0x3, 0xf, 0xf, 0x8, + } + }; + // reading all faster because ternary logic is good + uint64_t const g0 = a_table[0][p_index]; + uint64_t const g1 = a_table[1][p_index]; + uint64_t const g2 = a_table[2][p_index]; + uint64_t const lookup_table = 0x0000 + | ((g1 << 16) | (g1 << 20) | (g0 << 24) | (g1 << 28)) + | ((g2 << 32) | (g2 << 36) | (g2 << 40) | (g2 << 44)); + return (lookup_table >> (((s_index * 4) + ns) * 4)) & 0x0f; } - IndexInfo colorIndex(const Mode &mode, bool isAnchor, - int &indexBitOffset) const { + IndexInfo colorIndex(const Mode &mode, bool isAnchor, int32_t &indexBitOffset) const { // ARB_texture_compression_bptc states: // "The index value for interpolating color comes from the secondary // index for the texel if the format has an index selection bit and its @@ -1431,17 +1281,14 @@ namespace { auto idx = Get(mode.IndexSelection()); assert(idx <= 1); bool secondary = idx == 1; - auto numBits = secondary ? mode.IB2 : mode.IB; - auto numReadBits = numBits - (isAnchor ? 1 : 0); - auto index = - Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits) - : mode.PrimaryIndex(indexBitOffset, numReadBits)); + auto num_bits = secondary ? mode.IB2 : mode.IB; + auto numReadBits = num_bits - (isAnchor ? 1 : 0); + auto index = Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits) : mode.PrimaryIndex(indexBitOffset, numReadBits)); indexBitOffset += numReadBits; - return {index, numBits}; + return {index, num_bits}; } - IndexInfo alphaIndex(const Mode &mode, bool isAnchor, - int &indexBitOffset) const { + IndexInfo alphaIndex(const Mode &mode, bool isAnchor, int32_t &indexBitOffset) const { // ARB_texture_compression_bptc states: // "The alpha index comes from the secondary index if the block has a // secondary index and the block either doesn't have an index selection @@ -1449,13 +1296,11 @@ namespace { auto idx = Get(mode.IndexSelection()); assert(idx <= 1); bool secondary = (mode.IB2 != 0) && (idx == 0); - auto numBits = secondary ? mode.IB2 : mode.IB; - auto numReadBits = numBits - (isAnchor ? 1 : 0); - auto index = - Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits) - : mode.PrimaryIndex(indexBitOffset, numReadBits)); + auto num_bits = secondary ? mode.IB2 : mode.IB; + auto numReadBits = num_bits - (isAnchor ? 1 : 0); + auto index = Get(secondary ? mode.SecondaryIndex(indexBitOffset, numReadBits) : mode.PrimaryIndex(indexBitOffset, numReadBits)); indexBitOffset += numReadBits; - return {index, numBits}; + return {index, num_bits}; } // Assumes little-endian @@ -1472,13 +1317,13 @@ namespace bcn { constexpr size_t R8g8b8a8Bpp{4}; //!< The amount of bytes per pixel in R8G8B8A8 constexpr size_t R16g16b16a16Bpp{8}; //!< The amount of bytes per pixel in R16G16B16 - void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) { + void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) { const auto *color{reinterpret_cast(src)}; size_t pitch{R8g8b8a8Bpp * width}; color->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp, true, false); } - void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) { + void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) { const auto *alpha{reinterpret_cast(src)}; const auto *color{reinterpret_cast(src + 8)}; size_t pitch{R8g8b8a8Bpp * width}; @@ -1486,7 +1331,7 @@ namespace bcn { alpha->decode(dst, x, y, width, height, pitch, R8g8b8a8Bpp); } - void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) { + void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) { const auto *alpha{reinterpret_cast(src)}; const auto *color{reinterpret_cast(src + 8)}; size_t pitch{R8g8b8a8Bpp * width}; @@ -1514,7 +1359,7 @@ namespace bcn { block->decode(dst, x, y, width, height, pitch, R16g16b16a16Bpp, isSigned); } - void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height) { + void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned) { const auto *block{reinterpret_cast(src)}; size_t pitch{R8g8b8a8Bpp * width}; block->decode(dst, x, y, width, height, pitch); diff --git a/externals/bc_decoder/bc_decoder.h b/externals/bc_decoder/bc_decoder.h index 4f0ead7d38..2a49b1f93f 100644 --- a/externals/bc_decoder/bc_decoder.h +++ b/externals/bc_decoder/bc_decoder.h @@ -1,43 +1,23 @@ // SPDX-License-Identifier: MPL-2.0 -// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Copyright 2022 Skyline Team and Contributors (https://github.com/skyline-emu/) #pragma once #include namespace bcn { - /** - * @brief Decodes a BC1 encoded image to R8G8B8A8 - */ - void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height); - - /** - * @brief Decodes a BC2 encoded image to R8G8B8A8 - */ - void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height); - - /** - * @brief Decodes a BC3 encoded image to R8G8B8A8 - */ - void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height); - - /** - * @brief Decodes a BC4 encoded image to R8 - */ + /// @brief Decodes a BC1 encoded image to R8G8B8A8 + void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned); + /// @brief Decodes a BC2 encoded image to R8G8B8A8 + void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned); + //// @brief Decodes a BC3 encoded image to R8G8B8A8 + void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned); + /// @brief Decodes a BC4 encoded image to R8 void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned); - - /** - * @brief Decodes a BC5 encoded image to R8G8 - */ + //// @brief Decodes a BC5 encoded image to R8G8 void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned); - - /** - * @brief Decodes a BC6 encoded image to R16G16B16A16 - */ + //// @brief Decodes a BC6 encoded image to R16G16B16A16 void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned); - - /** - * @brief Decodes a BC7 encoded image to R8G8B8A8 - */ - void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height); + /// @brief Decodes a BC7 encoded image to R8G8B8A8 + void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned); } diff --git a/src/video_core/texture_cache/decode_bc.cpp b/src/video_core/texture_cache/decode_bc.cpp index 4242cb0fd3..37f67d0cd7 100644 --- a/src/video_core/texture_cache/decode_bc.cpp +++ b/src/video_core/texture_cache/decode_bc.cpp @@ -9,6 +9,7 @@ #include #include +#include "common/assert.h" #include "common/common_types.h" #include "video_core/texture_cache/decode_bc.h" @@ -62,9 +63,28 @@ u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) { } } -template -void DecompressBlocks(std::span input, std::span output, BufferImageCopy& copy, - bool is_signed = false) { +void DecompressBCn(std::span input, std::span output, BufferImageCopy& copy, VideoCore::Surface::PixelFormat pixel_format) { + auto const f = [pixel_format]{ + switch (pixel_format) { + case PixelFormat::BC1_RGBA_UNORM: + case PixelFormat::BC1_RGBA_SRGB: return &bcn::DecodeBc1; + case PixelFormat::BC2_UNORM: + case PixelFormat::BC2_SRGB: return &bcn::DecodeBc2; + case PixelFormat::BC3_UNORM: + case PixelFormat::BC3_SRGB: return &bcn::DecodeBc3; + case PixelFormat::BC4_SNORM: + case PixelFormat::BC4_UNORM: return &bcn::DecodeBc4; + case PixelFormat::BC5_SNORM: + case PixelFormat::BC5_UNORM: return &bcn::DecodeBc5; + case PixelFormat::BC6H_SFLOAT: + case PixelFormat::BC6H_UFLOAT: return &bcn::DecodeBc6; + case PixelFormat::BC7_SRGB: + case PixelFormat::BC7_UNORM: return &bcn::DecodeBc7; + default: + UNREACHABLE_MSG("Unimplemented BCn decompression {}", pixel_format); + return &bcn::DecodeBc1; + } + }(); const u32 out_bpp = ConvertedBytesPerBlock(pixel_format); const u32 block_size = BlockSize(pixel_format); const u32 width = copy.image_extent.width; @@ -82,11 +102,7 @@ void DecompressBlocks(std::span input, std::span output, BufferIma for (u32 x = 0; x < width; x += block_width) { const u8* src = input.data() + src_offset; u8* const dst = output.data() + dst_offset; - if constexpr (IsSigned(pixel_format)) { - decompress(src, dst, x, y, width, height, is_signed); - } else { - decompress(src, dst, x, y, width, height); - } + f(src, dst, x, y, width, height, IsSigned(pixel_format)); src_offset += block_size; dst_offset += block_width * out_bpp; } @@ -96,43 +112,4 @@ void DecompressBlocks(std::span input, std::span output, BufferIma } } -void DecompressBCn(std::span input, std::span output, BufferImageCopy& copy, - VideoCore::Surface::PixelFormat pixel_format) { - switch (pixel_format) { - case PixelFormat::BC1_RGBA_UNORM: - case PixelFormat::BC1_RGBA_SRGB: - DecompressBlocks(input, output, copy); - break; - case PixelFormat::BC2_UNORM: - case PixelFormat::BC2_SRGB: - DecompressBlocks(input, output, copy); - break; - case PixelFormat::BC3_UNORM: - case PixelFormat::BC3_SRGB: - DecompressBlocks(input, output, copy); - break; - case PixelFormat::BC4_SNORM: - case PixelFormat::BC4_UNORM: - DecompressBlocks( - input, output, copy, pixel_format == PixelFormat::BC4_SNORM); - break; - case PixelFormat::BC5_SNORM: - case PixelFormat::BC5_UNORM: - DecompressBlocks( - input, output, copy, pixel_format == PixelFormat::BC5_SNORM); - break; - case PixelFormat::BC6H_SFLOAT: - case PixelFormat::BC6H_UFLOAT: - DecompressBlocks( - input, output, copy, pixel_format == PixelFormat::BC6H_SFLOAT); - break; - case PixelFormat::BC7_SRGB: - case PixelFormat::BC7_UNORM: - DecompressBlocks(input, output, copy); - break; - default: - LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format); - } -} - } // namespace VideoCommon diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index e55d0752ec..786b5f23aa 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -922,8 +922,7 @@ boost::container::small_vector UnswizzleImage(Tegra::Memory return copies; } -void ConvertImage(std::span input, const ImageInfo& info, std::span output, - std::span copies) { +void ConvertImage(std::span input, const ImageInfo& info, std::span output, std::span copies) { u32 output_offset = 0; Common::ScratchBuffer decode_scratch; @@ -955,10 +954,10 @@ void ConvertImage(std::span input, const ImageInfo& info, std::span input, const ImageInfo& info, std::span(copy.buffer_size); + const u32 aligned_plane_dim = Common::AlignUp(copy.image_extent.width, 4) * Common::AlignUp(copy.image_extent.height, 4); + copy.buffer_size = (aligned_plane_dim * copy.image_extent.depth * copy.image_subresource.num_layers) / bpp_div; + output_offset += u32(copy.buffer_size); } else { DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format); - output_offset += copy.image_extent.width * copy.image_extent.height * - copy.image_subresource.num_layers * - ConvertedBytesPerBlock(info.format); + output_offset += copy.image_extent.width * copy.image_extent.height * copy.image_subresource.num_layers * ConvertedBytesPerBlock(info.format); } copy.buffer_row_length = mip_size.width;