small force emit constant shifts
This commit is contained in:
parent
88eb1aab39
commit
89fc2b94e6
|
|
@ -165,7 +165,7 @@ const uint mod8_table = 0
|
|||
// Assumes num_bits < to_bit, num_bits and to_bit != 0
|
||||
uint ReplicateBits(uint value, uint num_bits, uint to_bit, uint table) {
|
||||
const uint repl = value & ((1 << num_bits) - 1);
|
||||
const uint shift = (table >> (num_bits * 2)) & 3;
|
||||
const uint shift = (table >> (num_bits << 1)) & 3;
|
||||
uint v = repl;
|
||||
v |= v << (num_bits << 0); // [ xxxx xxrr ]
|
||||
v |= v << (num_bits << 1); // [ xxxx rrrr ]
|
||||
|
|
@ -266,7 +266,7 @@ uint GetBitLength(uint n_vals, uint encoding_index) {
|
|||
const uint num_bits = NumBits(encoding_value);
|
||||
const uvec3 div_constant = uvec3(0, 0x5556, 0x3334);
|
||||
return num_bits * n_vals
|
||||
+ ((((n_vals * ((0x870 >> (encoding * 4)) & 0xf)) + ((0x420 >> (encoding * 4)) & 0xf))
|
||||
+ ((((n_vals * ((0x870 >> (encoding << 2)) & 0xf)) + ((0x420 >> (encoding << 2)) & 0xf))
|
||||
* div_constant[encoding]) >> 16);
|
||||
}
|
||||
|
||||
|
|
@ -647,19 +647,19 @@ uint UnquantizeTexelWeight(EncodingData val) {
|
|||
: FastReplicateTo6(bitval, bitlen);
|
||||
} else if (encoding == TRIT || encoding == QUINT) {
|
||||
uint B = 0, C = 0, D = 0;
|
||||
uint b_mask = (0x3100 >> (bitlen * 4)) & 0xf;
|
||||
uint b_mask = (0x3100 >> (bitlen << 2)) & 0xf;
|
||||
uint b = (bitval >> 1) & b_mask;
|
||||
D = QuintTritValue(val);
|
||||
if (encoding == TRIT) {
|
||||
switch (bitlen) {
|
||||
case 0: return D * 32; //0,32,64
|
||||
case 0: return D << 5; //0,32,64
|
||||
case 1: C = 50; break;
|
||||
case 2: C = 23; B = (b << 6) | (b << 2) | b; break;
|
||||
case 3: C = 11; B = (b << 5) | b; break;
|
||||
}
|
||||
} else if (encoding == QUINT) {
|
||||
switch (bitlen) {
|
||||
case 0: return D * 16; //0, 16, 32, 48, 64
|
||||
case 0: return D << 4; //0, 16, 32, 48, 64
|
||||
case 1: C = 28; break;
|
||||
case 2: C = 13; B = (b << 6) | (b << 1); break;
|
||||
}
|
||||
|
|
@ -681,7 +681,7 @@ void UnquantizeTexelWeights(uvec2 size, bool is_dual_plane) {
|
|||
}
|
||||
|
||||
uint GetUnquantizedTexelWeight(uint offset_base, uint plane, bool is_dual_plane) {
|
||||
const uint offset = is_dual_plane ? 2 * offset_base + plane : offset_base;
|
||||
const uint offset = is_dual_plane ? (offset_base << 1) + plane : offset_base;
|
||||
return result_vector[offset];
|
||||
}
|
||||
|
||||
|
|
@ -812,7 +812,7 @@ int FindLayout(uint mode) {
|
|||
| ((3) << (7 * 4)) //01a0 -> 7, 3 + 5 = 8
|
||||
;
|
||||
const uint if_mode3_t = sh3_mode + uint((mode & 0x10c) == 0x10c);
|
||||
const uint if_mode3_f = 5 + ((fl_const_table >> (sh0_mode * 4)) & 7);
|
||||
const uint if_mode3_f = 5 + ((fl_const_table >> (sh0_mode << 2)) & 7);
|
||||
return int((if_mode3_t & mask) | (if_mode3_f & ~mask));
|
||||
}
|
||||
|
||||
|
|
@ -902,7 +902,7 @@ void DecompressBlock(ivec3 coord) {
|
|||
const uint base_mode = base_cem & 3;
|
||||
const uint max_weight = DecodeMaxWeight(mode_layout, mode);
|
||||
const uint weight_bits = GetPackedBitSize(size_params, dual_plane, max_weight);
|
||||
const uint extra_cem_bits = base_mode > 0 ? ((0x85200 >> (num_partitions * 4)) & 0x0f) : 0;
|
||||
const uint extra_cem_bits = base_mode > 0 ? ((0x85200 >> (num_partitions << 2)) & 0x0f) : 0;
|
||||
const uint plane_selector_bits = dual_plane ? 2 : 0;
|
||||
uint remaining_bits = 128 - weight_bits - total_bitsread;
|
||||
remaining_bits -= extra_cem_bits;
|
||||
|
|
@ -928,7 +928,7 @@ void DecompressBlock(ivec3 coord) {
|
|||
const uint extra_cem = StreamBits(extra_cem_bits);
|
||||
const uint cem = ((extra_cem << 6) | base_cem) >> 2;
|
||||
const uint c0 = cem & ((1 << num_partitions) - 1);
|
||||
const uint c1 = (cem >> num_partitions) & ((1 << (num_partitions * 2)) - 1);
|
||||
const uint c1 = (cem >> num_partitions) & ((1 << (num_partitions << 1)) - 1);
|
||||
const uvec4 c = (uvec4(c0) >> uvec4(0, 1, 2, 3)) & 1;
|
||||
const uvec4 m = (uvec4(c1) >> uvec4(0, 2, 4, 6)) & 3;
|
||||
color_endpoint_mode = (((uvec4(base_mode) - (1 - c)) << 2) | m) & cem_mask;
|
||||
|
|
@ -951,36 +951,27 @@ void DecompressBlock(ivec3 coord) {
|
|||
color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx;
|
||||
const uint clear_byte_start = (weight_bits >> 3) + 1;
|
||||
|
||||
const uint byte_insert = ExtractBits(color_endpoint_data, (clear_byte_start - 1) * 8, 8) & uint(((1 << (weight_bits & 7)) - 1));
|
||||
const uint byte_insert = ExtractBits(color_endpoint_data, (clear_byte_start - 1) << 3, 8) & uint(((1 << (weight_bits & 7)) - 1));
|
||||
const uint vec_index = (clear_byte_start - 1) >> 2;
|
||||
color_endpoint_data[vec_index] = bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) & 3) * 8, 8);
|
||||
for (uint i = clear_byte_start; i < 16; ++i) {
|
||||
const uint idx = i >> 2;
|
||||
color_endpoint_data[idx] = bitfieldInsert(color_endpoint_data[idx], 0, int(i & 3) * 8, 8);
|
||||
}
|
||||
color_endpoint_data[vec_index] = bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) & 3) << 3, 8);
|
||||
for (uint i = clear_byte_start; i < 16; ++i)
|
||||
color_endpoint_data[i >> 2] = bitfieldInsert(color_endpoint_data[i >> 2], 0, int(i & 3) << 3, 8);
|
||||
|
||||
// Re-init vector variables for next decode phase
|
||||
result_index = 0;
|
||||
color_bitsread = 0;
|
||||
|
||||
// The limit for the Unquantize phase, avoids decoding more data than needed.
|
||||
result_vector_max_index = size_params.x * size_params.y;
|
||||
if (dual_plane) {
|
||||
result_vector_max_index *= 2;
|
||||
}
|
||||
result_vector_max_index = (size_params.x * size_params.y) << uint(dual_plane);
|
||||
DecodeIntegerSequence(max_weight, GetNumWeightValues(size_params, dual_plane));
|
||||
|
||||
UnquantizeTexelWeights(size_params, dual_plane);
|
||||
for (uint j = 0; j < block_dims.y; j++) {
|
||||
for (uint i = 0; i < block_dims.x; i++) {
|
||||
uint local_partition = 0;
|
||||
if (num_partitions > 1) {
|
||||
local_partition = Select2DPartition(partition_index, uvec2(i, j), num_partitions);
|
||||
}
|
||||
const uint local_partition = Select2DPartition(partition_index, uvec2(i, j), num_partitions) & (0 - uint(num_partitions > 1));
|
||||
const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]);
|
||||
const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]);
|
||||
const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane);
|
||||
const vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) >> 6);
|
||||
const vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + 32) >> 6);
|
||||
const vec4 p = (Cf / 65535.0f);
|
||||
imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue