better table set

This commit is contained in:
lizzie 2025-12-29 19:40:47 +00:00 committed by crueter
parent 697d6229af
commit 819dc6a93e
1 changed files with 27 additions and 64 deletions

View File

@ -107,36 +107,32 @@ namespace {
int32_t c[8] = {0};
if (isSigned) {
c[0] = static_cast<signed char>(data & 0xFF);
c[1] = static_cast<signed char>((data & 0xFF00) >> 8);
c[0] = int8_t(data & 0xFF);
c[1] = int8_t((data & 0xFF00) >> 8);
} else {
c[0] = static_cast<uint8_t>(data & 0xFF);
c[1] = static_cast<uint8_t>((data & 0xFF00) >> 8);
c[0] = uint8_t(data & 0xFF);
c[1] = uint8_t((data & 0xFF00) >> 8);
}
if (c[0] > c[1]) {
for (int32_t i = 2; i < 8; ++i) {
for (int32_t i = 2; i < 8; ++i)
c[i] = ((8 - i) * c[0] + (i - 1) * c[1]) / 7;
}
} else {
for (int32_t i = 2; i < 6; ++i) {
for (int32_t i = 2; i < 6; ++i)
c[i] = ((6 - i) * c[0] + (i - 1) * c[1]) / 5;
}
c[6] = isSigned ? -128 : 0;
c[7] = isSigned ? 127 : 255;
}
for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++) {
for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++) {
dst[channel + (i * dstBpp) + (j * dstPitch)] = static_cast<uint8_t>(c[getIdx((j * BlockHeight) + i)]);
}
}
for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++)
for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++)
dst[channel + (i * dstBpp) + (j * dstPitch)] = uint8_t(c[getIdx((j * BlockHeight) + i)]);
}
private:
uint8_t getIdx(int32_t i) const {
int32_t offset = i * 3 + 16;
return static_cast<uint8_t>((data & (0x7ull << offset)) >> offset);
return uint8_t((data & (0x7ull << offset)) >> offset);
}
uint64_t data;
@ -148,9 +144,8 @@ namespace {
dst += 3; // Write only to alpha (channel 3)
for (size_t j = 0; j < BlockHeight && (y + j) < dstH; j++, dst += dstPitch) {
uint8_t *dstRow = dst;
for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, dstRow += dstBpp) {
for (size_t i = 0; i < BlockWidth && (x + i) < dstW; i++, dstRow += dstBpp)
*dstRow = getAlpha(j * BlockHeight + i);
}
}
}
@ -158,7 +153,7 @@ namespace {
uint8_t getAlpha(int32_t i) const {
int32_t offset = i << 2;
int32_t alpha = (data & (0xFull << offset)) >> offset;
return static_cast<uint8_t>(alpha | (alpha << 4));
return uint8_t(alpha | (alpha << 4));
}
uint64_t data;
@ -746,15 +741,9 @@ namespace {
0b01010000010100000101010100000000, 0b00000000010101010101000001010000,
0b00010101000101010001000000010000, 0b01010100010101000000010000000100,
};
static const uint8_t AnchorTable2[MaxPartitions] = {
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
static const uint32_t a_table[MaxPartitions / 8] = {
0xffffffff, 0xffffffff, 0xf882282f, 0x22882282,
0xff8286ff, 0x6ff22282, 0x22ff8626, 0xf22fffff,
};
// @fmt:on
@ -836,12 +825,11 @@ namespace {
}
// Get the indices, calculate final colors, and output
for (int32_t y = 0; y < 4; y++) {
for (int32_t x = 0; x < 4; x++) {
int32_t pixelNum = x + y * 4;
for (uint32_t y = 0; y < 4; y++) {
for (uint32_t x = 0; x < 4; x++) {
uint32_t pixelNum = x + y * 4, firstEndpoint = 0;
IndexInfo idx;
bool isAnchor = false;
int32_t firstEndpoint = 0;
// Bc6H can have either 1 or 2 petitions depending on the mode.
// The number of petitions affects the number of indices with implicit
// leading 0 bits and the number of bits per index.
@ -852,7 +840,8 @@ namespace {
} else {
idx.num_bits = 3;
// There are 2 indices with implicit leading 0-bits.
isAnchor = ((pixelNum == 0) || (pixelNum == AnchorTable2[partition]));
uint32_t anchor_value = (a_table[partition / 8] >> (partition * 4)) & 0x0f;
isAnchor = ((pixelNum == 0) || (pixelNum == anchor_value));
firstEndpoint = ((p_table[partition] >> pixelNum) & 0x03) * 2;
}
@ -1216,40 +1205,14 @@ namespace {
// Table.A2 and Table.A3.""
// Note: This is really confusing - I believe they meant subset instead of partition here.
// s_index >= 0 && s_index <= 2
alignas(64) static const uint8_t a_table[3][64] = {
{
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
0xf, 0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0xf,
0x2, 0x8, 0x2, 0x2, 0x8, 0x8, 0x2, 0x2,
0xf, 0xf, 0x6, 0x8, 0x2, 0x8, 0xf, 0xf,
0x2, 0x8, 0x2, 0x2, 0x2, 0xf, 0xf, 0x6,
0x6, 0x2, 0x6, 0x8, 0xf, 0xf, 0x2, 0x2,
0xf, 0xf, 0xf, 0xf, 0xf, 0x2, 0x2, 0xf,
}, {
0x3, 0x3, 0xf, 0xf, 0x8, 0x3, 0xf, 0xf,
0x8, 0x8, 0x6, 0x6, 0x6, 0x5, 0x3, 0x3,
0x3, 0x3, 0x8, 0xf, 0x3, 0x3, 0x6, 0xa,
0x5, 0x8, 0x8, 0x6, 0x8, 0x5, 0xf, 0xf,
0x8, 0xf, 0x3, 0x5, 0x6, 0xa, 0x8, 0xf,
0xf, 0x3, 0xf, 0x5, 0xf, 0xf, 0xf, 0xf,
0x3, 0xf, 0x5, 0x5, 0x5, 0x8, 0x5, 0xa,
0x5, 0xa, 0x8, 0xd, 0xf, 0xc, 0x3, 0x3,
}, {
0xf, 0x8, 0x8, 0x3, 0xf, 0xf, 0x3, 0x8,
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x8,
0xf, 0x8, 0xf, 0x3, 0xf, 0x8, 0xf, 0x8,
0x3, 0xf, 0x6, 0xa, 0xf, 0xf, 0xa, 0x8,
0xf, 0x3, 0xf, 0xa, 0xa, 0x8, 0x9, 0xa,
0x6, 0xf, 0x8, 0xf, 0x3, 0x6, 0x6, 0x8,
0xf, 0x3, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
0xf, 0xf, 0xf, 0xf, 0x3, 0xf, 0xf, 0x8,
}
alignas(64) static const uint32_t a_table[3][64 / 8] = {
{ 0xffffffff, 0xffffffff, 0xf882282f, 0x22882282, 0xff8286ff, 0x6ff22282, 0x22ff8626, 0xf22fffff },
{ 0xff38ff33, 0x33566688, 0xa633f833, 0xff586885, 0xf8a653f8, 0xffff5f3f, 0xa58555f3, 0x33cfd8a5 },
{ 0x83ff388f, 0x8fffffff, 0x8f8f3f8f, 0x8affa6f3, 0xa98aaf3f, 0x8663f8f6, 0xffffff3f, 0x8ff3ffff },
};
// reading all faster because ternary logic is good
uint64_t const g0 = a_table[0][p_index];
uint64_t const g1 = a_table[1][p_index];
uint64_t const g2 = a_table[2][p_index];
uint64_t const g0 = (a_table[0][p_index / 8] >> (p_index * 4)) & 0x0f; // reading all faster because ternary logic is good
uint64_t const g1 = (a_table[1][p_index / 8] >> (p_index * 4)) & 0x0f;
uint64_t const g2 = (a_table[2][p_index / 8] >> (p_index * 4)) & 0x0f;
uint64_t const lookup_table = 0x0000
| ((g1 << 16) | (g1 << 20) | (g0 << 24) | (g1 << 28))
| ((g2 << 32) | (g2 << 36) | (g2 << 40) | (g2 << 44));