[bcn] ternary compose and simplify subsetIndex & anchordIndex

This commit is contained in:
lizzie 2025-12-29 06:05:53 +00:00
parent 1d13f7a666
commit 47cb31a5ef
4 changed files with 337 additions and 542 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,43 +1,23 @@
// SPDX-License-Identifier: MPL-2.0
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
// Copyright 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
#pragma once
#include <cstdint>
namespace bcn {
/**
* @brief Decodes a BC1 encoded image to R8G8B8A8
*/
void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
/**
* @brief Decodes a BC2 encoded image to R8G8B8A8
*/
void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
/**
* @brief Decodes a BC3 encoded image to R8G8B8A8
*/
void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
/**
* @brief Decodes a BC4 encoded image to R8
*/
/// @brief Decodes a BC1 encoded image to R8G8B8A8
void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
/// @brief Decodes a BC2 encoded image to R8G8B8A8
void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
//// @brief Decodes a BC3 encoded image to R8G8B8A8
void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
/// @brief Decodes a BC4 encoded image to R8
void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
/**
* @brief Decodes a BC5 encoded image to R8G8
*/
//// @brief Decodes a BC5 encoded image to R8G8
void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
/**
* @brief Decodes a BC6 encoded image to R16G16B16A16
*/
//// @brief Decodes a BC6 encoded image to R16G16B16A16
void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
/**
* @brief Decodes a BC7 encoded image to R8G8B8A8
*/
void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
/// @brief Decodes a BC7 encoded image to R8G8B8A8
void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
}

View File

@ -9,6 +9,7 @@
#include <span>
#include <bc_decoder.h>
#include "common/assert.h"
#include "common/common_types.h"
#include "video_core/texture_cache/decode_bc.h"
@ -62,9 +63,28 @@ u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) {
}
}
template <auto decompress, PixelFormat pixel_format>
void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
bool is_signed = false) {
void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, VideoCore::Surface::PixelFormat pixel_format) {
auto const f = [pixel_format]{
switch (pixel_format) {
case PixelFormat::BC1_RGBA_UNORM:
case PixelFormat::BC1_RGBA_SRGB: return &bcn::DecodeBc1;
case PixelFormat::BC2_UNORM:
case PixelFormat::BC2_SRGB: return &bcn::DecodeBc2;
case PixelFormat::BC3_UNORM:
case PixelFormat::BC3_SRGB: return &bcn::DecodeBc3;
case PixelFormat::BC4_SNORM:
case PixelFormat::BC4_UNORM: return &bcn::DecodeBc4;
case PixelFormat::BC5_SNORM:
case PixelFormat::BC5_UNORM: return &bcn::DecodeBc5;
case PixelFormat::BC6H_SFLOAT:
case PixelFormat::BC6H_UFLOAT: return &bcn::DecodeBc6;
case PixelFormat::BC7_SRGB:
case PixelFormat::BC7_UNORM: return &bcn::DecodeBc7;
default:
UNREACHABLE_MSG("Unimplemented BCn decompression {}", pixel_format);
return &bcn::DecodeBc1;
}
}();
const u32 out_bpp = ConvertedBytesPerBlock(pixel_format);
const u32 block_size = BlockSize(pixel_format);
const u32 width = copy.image_extent.width;
@ -82,11 +102,7 @@ void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferIma
for (u32 x = 0; x < width; x += block_width) {
const u8* src = input.data() + src_offset;
u8* const dst = output.data() + dst_offset;
if constexpr (IsSigned(pixel_format)) {
decompress(src, dst, x, y, width, height, is_signed);
} else {
decompress(src, dst, x, y, width, height);
}
f(src, dst, x, y, width, height, IsSigned(pixel_format));
src_offset += block_size;
dst_offset += block_width * out_bpp;
}
@ -96,43 +112,4 @@ void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferIma
}
}
void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
VideoCore::Surface::PixelFormat pixel_format) {
switch (pixel_format) {
case PixelFormat::BC1_RGBA_UNORM:
case PixelFormat::BC1_RGBA_SRGB:
DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, copy);
break;
case PixelFormat::BC2_UNORM:
case PixelFormat::BC2_SRGB:
DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, copy);
break;
case PixelFormat::BC3_UNORM:
case PixelFormat::BC3_SRGB:
DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, copy);
break;
case PixelFormat::BC4_SNORM:
case PixelFormat::BC4_UNORM:
DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>(
input, output, copy, pixel_format == PixelFormat::BC4_SNORM);
break;
case PixelFormat::BC5_SNORM:
case PixelFormat::BC5_UNORM:
DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>(
input, output, copy, pixel_format == PixelFormat::BC5_SNORM);
break;
case PixelFormat::BC6H_SFLOAT:
case PixelFormat::BC6H_UFLOAT:
DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>(
input, output, copy, pixel_format == PixelFormat::BC6H_SFLOAT);
break;
case PixelFormat::BC7_SRGB:
case PixelFormat::BC7_UNORM:
DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, copy);
break;
default:
LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format);
}
}
} // namespace VideoCommon

View File

@ -922,8 +922,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
return copies;
}
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies) {
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, std::span<BufferImageCopy> copies) {
u32 output_offset = 0;
Common::ScratchBuffer<u8> decode_scratch;
@ -955,10 +954,10 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
} else if (astc) {
// BC1 uses 0.5 bytes per texel
// BC3 uses 1 byte per texel
const auto compress = recompression_setting == Settings::AstcRecompression::Bc1
? Tegra::Texture::BCN::CompressBC1
: Tegra::Texture::BCN::CompressBC3;
const auto bpp_div = recompression_setting == Settings::AstcRecompression::Bc1 ? 2 : 1;
auto const compress = recompression_setting == Settings::AstcRecompression::Bc1
? Tegra::Texture::BCN::CompressBC1
: Tegra::Texture::BCN::CompressBC3;
const auto bpp_div = compress == Tegra::Texture::BCN::CompressBC1 ? 2 : 1;
const u32 plane_dim = copy.image_extent.width * copy.image_extent.height;
const u32 level_size = plane_dim * copy.image_extent.depth *
@ -975,18 +974,12 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
copy.image_subresource.num_layers * copy.image_extent.depth,
output.subspan(output_offset));
const u32 aligned_plane_dim = Common::AlignUp(copy.image_extent.width, 4) *
Common::AlignUp(copy.image_extent.height, 4);
copy.buffer_size =
(aligned_plane_dim * copy.image_extent.depth * copy.image_subresource.num_layers) /
bpp_div;
output_offset += static_cast<u32>(copy.buffer_size);
const u32 aligned_plane_dim = Common::AlignUp(copy.image_extent.width, 4) * Common::AlignUp(copy.image_extent.height, 4);
copy.buffer_size = (aligned_plane_dim * copy.image_extent.depth * copy.image_subresource.num_layers) / bpp_div;
output_offset += u32(copy.buffer_size);
} else {
DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format);
output_offset += copy.image_extent.width * copy.image_extent.height *
copy.image_subresource.num_layers *
ConvertedBytesPerBlock(info.format);
output_offset += copy.image_extent.width * copy.image_extent.height * copy.image_subresource.num_layers * ConvertedBytesPerBlock(info.format);
}
copy.buffer_row_length = mip_size.width;