[bcn] ternary compose and simplify subsetIndex & anchordIndex

2025-12-29 06:05:53 +00:00 · 2025-12-29 06:05:53 +00:00 · 47cb31a5ef
parent 1d13f7a666
commit 47cb31a5ef
4 changed files with 337 additions and 542 deletions
--- a/externals/bc_decoder/bc_decoder.cpp
+++ b/externals/bc_decoder/bc_decoder.cpp
--- a/externals/bc_decoder/bc_decoder.h
+++ b/externals/bc_decoder/bc_decoder.h
@ -1,43 +1,23 @@
 // SPDX-License-Identifier: MPL-2.0
-// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
+// Copyright 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)

 #pragma once

 #include <cstdint>

 namespace bcn {
-    /**
-     * @brief Decodes a BC1 encoded image to R8G8B8A8
-     */
-    void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
-
-    /**
-     * @brief Decodes a BC2 encoded image to R8G8B8A8
-     */
-    void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
-
-    /**
-     * @brief Decodes a BC3 encoded image to R8G8B8A8
-     */
-    void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
-
-    /**
-     * @brief Decodes a BC4 encoded image to R8
-     */
+    /// @brief Decodes a BC1 encoded image to R8G8B8A8
+    void DecodeBc1(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
+    /// @brief Decodes a BC2 encoded image to R8G8B8A8
+    void DecodeBc2(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
+    //// @brief Decodes a BC3 encoded image to R8G8B8A8
+    void DecodeBc3(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
+    /// @brief Decodes a BC4 encoded image to R8
    void DecodeBc4(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
-
-    /**
-     * @brief Decodes a BC5 encoded image to R8G8
-     */
+    //// @brief Decodes a BC5 encoded image to R8G8
    void DecodeBc5(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
-
-    /**
-     * @brief Decodes a BC6 encoded image to R16G16B16A16
-     */
+    //// @brief Decodes a BC6 encoded image to R16G16B16A16
    void DecodeBc6(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
-
-    /**
-     * @brief Decodes a BC7 encoded image to R8G8B8A8
-     */
-    void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height);
+    /// @brief Decodes a BC7 encoded image to R8G8B8A8
+    void DecodeBc7(const uint8_t *src, uint8_t *dst, size_t x, size_t y, size_t width, size_t height, bool isSigned);
 }
--- a/src/video_core/texture_cache/decode_bc.cpp
+++ b/src/video_core/texture_cache/decode_bc.cpp
@ -9,6 +9,7 @@
 #include <span>
 #include <bc_decoder.h>

+#include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/texture_cache/decode_bc.h"

@ -62,9 +63,28 @@ u32 ConvertedBytesPerBlock(VideoCore::Surface::PixelFormat pixel_format) {
    }
 }

-template <auto decompress, PixelFormat pixel_format>
-void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
-                      bool is_signed = false) {
+void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy, VideoCore::Surface::PixelFormat pixel_format) {
+    auto const f = [pixel_format]{
+        switch (pixel_format) {
+        case PixelFormat::BC1_RGBA_UNORM:
+        case PixelFormat::BC1_RGBA_SRGB: return &bcn::DecodeBc1;
+        case PixelFormat::BC2_UNORM:
+        case PixelFormat::BC2_SRGB: return &bcn::DecodeBc2;
+        case PixelFormat::BC3_UNORM:
+        case PixelFormat::BC3_SRGB: return &bcn::DecodeBc3;
+        case PixelFormat::BC4_SNORM:
+        case PixelFormat::BC4_UNORM: return &bcn::DecodeBc4;
+        case PixelFormat::BC5_SNORM:
+        case PixelFormat::BC5_UNORM: return &bcn::DecodeBc5;
+        case PixelFormat::BC6H_SFLOAT:
+        case PixelFormat::BC6H_UFLOAT: return &bcn::DecodeBc6;
+        case PixelFormat::BC7_SRGB:
+        case PixelFormat::BC7_UNORM: return &bcn::DecodeBc7;
+        default:
+            UNREACHABLE_MSG("Unimplemented BCn decompression {}", pixel_format);
+            return &bcn::DecodeBc1;
+        }
+    }();
    const u32 out_bpp = ConvertedBytesPerBlock(pixel_format);
    const u32 block_size = BlockSize(pixel_format);
    const u32 width = copy.image_extent.width;
@ -82,11 +102,7 @@ void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferIma
            for (u32 x = 0; x < width; x += block_width) {
                const u8* src = input.data() + src_offset;
                u8* const dst = output.data() + dst_offset;
-                if constexpr (IsSigned(pixel_format)) {
-                    decompress(src, dst, x, y, width, height, is_signed);
-                } else {
-                    decompress(src, dst, x, y, width, height);
-                }
+                f(src, dst, x, y, width, height, IsSigned(pixel_format));
                src_offset += block_size;
                dst_offset += block_width * out_bpp;
            }
@ -96,43 +112,4 @@ void DecompressBlocks(std::span<const u8> input, std::span<u8> output, BufferIma
    }
 }

-void DecompressBCn(std::span<const u8> input, std::span<u8> output, BufferImageCopy& copy,
-                   VideoCore::Surface::PixelFormat pixel_format) {
-    switch (pixel_format) {
-    case PixelFormat::BC1_RGBA_UNORM:
-    case PixelFormat::BC1_RGBA_SRGB:
-        DecompressBlocks<bcn::DecodeBc1, PixelFormat::BC1_RGBA_UNORM>(input, output, copy);
-        break;
-    case PixelFormat::BC2_UNORM:
-    case PixelFormat::BC2_SRGB:
-        DecompressBlocks<bcn::DecodeBc2, PixelFormat::BC2_UNORM>(input, output, copy);
-        break;
-    case PixelFormat::BC3_UNORM:
-    case PixelFormat::BC3_SRGB:
-        DecompressBlocks<bcn::DecodeBc3, PixelFormat::BC3_UNORM>(input, output, copy);
-        break;
-    case PixelFormat::BC4_SNORM:
-    case PixelFormat::BC4_UNORM:
-        DecompressBlocks<bcn::DecodeBc4, PixelFormat::BC4_UNORM>(
-            input, output, copy, pixel_format == PixelFormat::BC4_SNORM);
-        break;
-    case PixelFormat::BC5_SNORM:
-    case PixelFormat::BC5_UNORM:
-        DecompressBlocks<bcn::DecodeBc5, PixelFormat::BC5_UNORM>(
-            input, output, copy, pixel_format == PixelFormat::BC5_SNORM);
-        break;
-    case PixelFormat::BC6H_SFLOAT:
-    case PixelFormat::BC6H_UFLOAT:
-        DecompressBlocks<bcn::DecodeBc6, PixelFormat::BC6H_UFLOAT>(
-            input, output, copy, pixel_format == PixelFormat::BC6H_SFLOAT);
-        break;
-    case PixelFormat::BC7_SRGB:
-    case PixelFormat::BC7_UNORM:
-        DecompressBlocks<bcn::DecodeBc7, PixelFormat::BC7_UNORM>(input, output, copy);
-        break;
-    default:
-        LOG_WARNING(HW_GPU, "Unimplemented BCn decompression {}", pixel_format);
-    }
-}
-
 } // namespace VideoCommon
--- a/src/video_core/texture_cache/util.cpp
+++ b/src/video_core/texture_cache/util.cpp
@ -922,8 +922,7 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
    return copies;
 }

-void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
-                  std::span<BufferImageCopy> copies) {
+void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, std::span<BufferImageCopy> copies) {
    u32 output_offset = 0;
    Common::ScratchBuffer<u8> decode_scratch;

@ -955,10 +954,10 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
        } else if (astc) {
            // BC1 uses 0.5 bytes per texel
            // BC3 uses 1 byte per texel
-            const auto compress = recompression_setting == Settings::AstcRecompression::Bc1
-                                      ? Tegra::Texture::BCN::CompressBC1
-                                      : Tegra::Texture::BCN::CompressBC3;
-            const auto bpp_div = recompression_setting == Settings::AstcRecompression::Bc1 ? 2 : 1;
+            auto const compress = recompression_setting == Settings::AstcRecompression::Bc1
+                ? Tegra::Texture::BCN::CompressBC1
+                : Tegra::Texture::BCN::CompressBC3;
+            const auto bpp_div = compress == Tegra::Texture::BCN::CompressBC1 ? 2 : 1;

            const u32 plane_dim = copy.image_extent.width * copy.image_extent.height;
            const u32 level_size = plane_dim * copy.image_extent.depth *
@ -975,18 +974,12 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
                     copy.image_subresource.num_layers * copy.image_extent.depth,
                     output.subspan(output_offset));

-            const u32 aligned_plane_dim = Common::AlignUp(copy.image_extent.width, 4) *
-                                          Common::AlignUp(copy.image_extent.height, 4);
-
-            copy.buffer_size =
-                (aligned_plane_dim * copy.image_extent.depth * copy.image_subresource.num_layers) /
-                bpp_div;
-            output_offset += static_cast<u32>(copy.buffer_size);
+            const u32 aligned_plane_dim = Common::AlignUp(copy.image_extent.width, 4) * Common::AlignUp(copy.image_extent.height, 4);
+            copy.buffer_size = (aligned_plane_dim * copy.image_extent.depth * copy.image_subresource.num_layers) / bpp_div;
+            output_offset += u32(copy.buffer_size);
        } else {
            DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format);
-            output_offset += copy.image_extent.width * copy.image_extent.height *
-                             copy.image_subresource.num_layers *
-                             ConvertedBytesPerBlock(info.format);
+            output_offset += copy.image_extent.width * copy.image_extent.height * copy.image_subresource.num_layers * ConvertedBytesPerBlock(info.format);
        }

        copy.buffer_row_length = mip_size.width;