From ea6344b031db0a1941c173bdc232bcc6f925d736 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 27 Jan 2026 08:05:53 +0000 Subject: [PATCH] fuckit, prepare for accelerate gpu --- .../features/settings/model/IntSetting.kt | 1 - .../settings/model/view/SettingsItem.kt | 9 - .../settings/ui/SettingsFragmentPresenter.kt | 1 - .../app/src/main/res/values/arrays.xml | 18 +- .../app/src/main/res/values/strings.xml | 2 + src/common/settings.h | 9 - src/common/settings_enums.h | 2 +- src/qt_common/config/shared_translation.cpp | 8 - src/video_core/CMakeLists.txt | 2 - src/video_core/host_shaders/astc_decoder.comp | 19 +- .../renderer_opengl/gl_texture_cache.cpp | 27 +- .../renderer_opengl/util_shaders.cpp | 12 +- .../renderer_vulkan/maxwell_to_vk.cpp | 8 +- .../renderer_vulkan/vk_compute_pass.cpp | 21 +- .../renderer_vulkan/vk_texture_cache.cpp | 34 +- src/video_core/surface.cpp | 10 +- src/video_core/texture_cache/texture_cache.h | 11 +- src/video_core/texture_cache/util.cpp | 72 +- src/video_core/textures/astc.cpp | 1429 ----------------- src/video_core/textures/astc.h | 11 - 20 files changed, 65 insertions(+), 1641 deletions(-) delete mode 100644 src/video_core/textures/astc.cpp delete mode 100644 src/video_core/textures/astc.h diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt index be3b2f4a48..1c9ffa309f 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt @@ -16,7 +16,6 @@ enum class IntSetting(override val key: String) : AbstractIntSetting { RENDERER_BACKEND("backend"), RENDERER_VRAM_USAGE_MODE("vram_usage_mode"), RENDERER_NVDEC_EMULATION("nvdec_emulation"), - RENDERER_ASTC_DECODE_METHOD("accelerate_astc"), RENDERER_ASTC_RECOMPRESSION("astc_recompression"), RENDERER_ACCURACY("gpu_accuracy"), RENDERER_RESOLUTION("resolution_setup"), diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index 870eec1a1b..eb6d4ce7eb 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -325,15 +325,6 @@ abstract class SettingsItem( valuesId = R.array.rendererNvdecValues ) ) - put( - SingleChoiceSetting( - IntSetting.RENDERER_ASTC_DECODE_METHOD, - titleId = R.string.accelerate_astc, - descriptionId = R.string.accelerate_astc_description, - choicesId = R.array.astcDecodingMethodNames, - valuesId = R.array.astcDecodingMethodValues - ) - ) put( SingleChoiceSetting( IntSetting.RENDERER_ASTC_RECOMPRESSION, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index 490f2a97a5..c38d6c0c3d 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -268,7 +268,6 @@ class SettingsFragmentPresenter( add(IntSetting.DMA_ACCURACY.key) add(IntSetting.MAX_ANISOTROPY.key) add(IntSetting.RENDERER_VRAM_USAGE_MODE.key) - add(IntSetting.RENDERER_ASTC_DECODE_METHOD.key) add(IntSetting.RENDERER_ASTC_RECOMPRESSION.key) add(BooleanSetting.SYNC_MEMORY_OPERATIONS.key) diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index 69f1590844..a5001cbc12 100644 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -123,25 +123,13 @@ 1 - - - @string/accelerate_astc_cpu - @string/accelerate_astc_gpu - @string/accelerate_astc_async - - - - - 0 - 1 - 2 - - @string/astc_recompression_uncompressed @string/astc_recompression_bc1 @string/astc_recompression_bc3 + @string/astc_recompression_bc7 + @string/astc_recompression_etc2 @@ -149,6 +137,8 @@ 0 1 2 + 3 + 4 diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index 1e49450b95..20fbc56805 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -1033,6 +1033,8 @@ Uncompressed BC1 BC3 + BC7 + ETC2 Conservative diff --git a/src/common/settings.h b/src/common/settings.h index 09c05a812a..f8fe58b992 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -430,15 +430,6 @@ struct Values { #endif "max_anisotropy", Category::RendererAdvanced}; - SwitchableSetting accelerate_astc{linkage, -#ifdef ANDROID - AstcDecodeMode::Cpu, -#else - AstcDecodeMode::Gpu, -#endif - "accelerate_astc", - Category::RendererAdvanced}; - SwitchableSetting astc_recompression{linkage, AstcRecompression::Uncompressed, "astc_recompression", diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index 30d075565b..02f77d5efd 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -128,7 +128,7 @@ ENUM(TimeZone, Auto, Default, Cet, Cst6Cdt, Cuba, Eet, Egypt, Eire, Est, Est5Edt Roc, Rok, Singapore, Turkey, Uct, Universal, Utc, WSu, Wet, Zulu); ENUM(AnisotropyMode, Automatic, Default, X2, X4, X8, X16, X32, X64, None); ENUM(AstcDecodeMode, Cpu, Gpu, CpuAsynchronous); -ENUM(AstcRecompression, Uncompressed, Bc1, Bc3); +ENUM(AstcRecompression, Uncompressed, Bc1, Bc3, Bc7, Etc2); ENUM(VSyncMode, Immediate, Mailbox, Fifo, FifoRelaxed); ENUM(VramUsageMode, Conservative, Aggressive); ENUM(RendererBackend, OpenGL_GLSL, Vulkan, Null, OpenGL_GLASM, OpenGL_SPIRV); diff --git a/src/qt_common/config/shared_translation.cpp b/src/qt_common/config/shared_translation.cpp index 163eb57138..0db5ec940e 100644 --- a/src/qt_common/config/shared_translation.cpp +++ b/src/qt_common/config/shared_translation.cpp @@ -209,14 +209,6 @@ std::unique_ptr InitializeTranslations(QObject* parent) tr("Specifies how videos should be decoded.\nIt can either use the CPU or the GPU for " "decoding, or perform no decoding at all (black screen on videos).\n" "In most cases, GPU decoding provides the best performance.")); - INSERT(Settings, - accelerate_astc, - tr("ASTC Decoding Method:"), - tr("This option controls how ASTC textures should be decoded.\n" - "CPU: Use the CPU for decoding.\n" - "GPU: Use the GPU's compute shaders to decode ASTC textures (recommended).\n" - "CPU Asynchronously: Use the CPU to decode ASTC textures on demand. Eliminates" - "ASTC decoding\nstuttering but may present artifacts.")); INSERT( Settings, astc_recompression, diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index ed77ae8934..8338933edd 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -279,8 +279,6 @@ add_library(video_core STATIC texture_cache/types.h texture_cache/util.cpp texture_cache/util.h - textures/astc.h - textures/astc.cpp textures/bcn.cpp textures/bcn.h textures/decoders.cpp diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index da21b4bde8..2f316fe293 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -1075,16 +1075,14 @@ void DecompressBlock(ivec3 coord) { uint colvals_index = 0; DecodeColorValues(color_endpoint_mode, num_partitions, color_data_bits, color_values); for (uint i = 0; i < num_partitions; i++) { - ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], color_values, - colvals_index); + ComputeEndpoints(endpoints0[i], endpoints1[i], color_endpoint_mode[i], color_values, colvals_index); } } color_endpoint_data = local_buff; color_endpoint_data = bitfieldReverse(color_endpoint_data).wzyx; const uint clear_byte_start = (weight_bits >> 3) + 1; - const uint byte_insert = ExtractBits(color_endpoint_data, int(clear_byte_start - 1) * 8, 8) & - uint(((1 << (weight_bits % 8)) - 1)); + const uint byte_insert = ExtractBits(color_endpoint_data, int(clear_byte_start - 1) * 8, 8) & uint(((1 << (weight_bits % 8)) - 1)); const uint vec_index = (clear_byte_start - 1) >> 2; color_endpoint_data[vec_index] = bitfieldInsert(color_endpoint_data[vec_index], byte_insert, int((clear_byte_start - 1) % 4) * 8, 8); @@ -1115,8 +1113,7 @@ void DecompressBlock(ivec3 coord) { const uvec4 C0 = ReplicateByteTo16(endpoints0[local_partition]); const uvec4 C1 = ReplicateByteTo16(endpoints1[local_partition]); const uvec4 weight_vec = GetUnquantizedWeightVector(j, i, size_params, plane_index, dual_plane); - const vec4 Cf = - vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); + const vec4 Cf = vec4((C0 * (uvec4(64) - weight_vec) + C1 * weight_vec + uvec4(32)) / 64); const vec4 p = (Cf / 65535.0f); imageStore(dest_image, coord + ivec3(i, j, 0), p.gbar); } @@ -1124,11 +1121,11 @@ void DecompressBlock(ivec3 coord) { } uint SwizzleOffset(uvec2 pos) { - return ((pos.x & 32u) << 3u) | - ((pos.y & 6u) << 5u) | - ((pos.x & 16u) << 1u) | - ((pos.y & 1u) << 4u) | - (pos.x & 15u); + return ((pos.x & 32u) << 3u) + | ((pos.y & 6u) << 5u) + | ((pos.x & 16u) << 1u) + | ((pos.y & 1u) << 4u) + | (pos.x & 15u); } void main() { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 958988f27e..6e3d5d51f3 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project @@ -235,9 +235,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::arraydevice, info.format, info.type)) { diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index c437013e6a..4cb7ce8c55 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -25,14 +28,11 @@ #include "video_core/texture_cache/accelerated_swizzle.h" #include "video_core/texture_cache/types.h" #include "video_core/texture_cache/util.h" -#include "video_core/textures/astc.h" #include "video_core/textures/decoders.h" namespace OpenGL { using namespace HostShaders; -using namespace Tegra::Texture::ASTC; - using VideoCommon::Extent2D; using VideoCommon::Extent3D; using VideoCommon::ImageCopy; @@ -141,10 +141,8 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map, glUniform1ui(5, params.x_shift); glUniform1ui(6, params.block_height); glUniform1ui(7, params.block_height_mask); - glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, - image.guest_size_bytes - swizzle.buffer_offset); - glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, - GL_WRITE_ONLY, store_format); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); + glBindImageTexture(BINDING_OUTPUT_IMAGE, image.StorageHandle(), swizzle.level, GL_TRUE, 0, GL_WRITE_ONLY, store_format); glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers); } program_manager.RestoreGuestCompute(); diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 024c72b38e..9b0115cf38 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project @@ -251,6 +251,12 @@ FormatInfo SurfaceFormat(const Device& device, FormatType format_type, bool with case Settings::AstcRecompression::Bc3: tuple.format = is_srgb ? VK_FORMAT_BC3_SRGB_BLOCK : VK_FORMAT_BC3_UNORM_BLOCK; break; + case Settings::AstcRecompression::Bc7: + tuple.format = is_srgb ? VK_FORMAT_BC7_SRGB_BLOCK : VK_FORMAT_BC7_UNORM_BLOCK; + break; + case Settings::AstcRecompression::Etc2: + tuple.format = is_srgb ? VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK : VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK; + break; } } // Transcode on hardware that doesn't support BCn natively diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 0ae81af0fb..b03e78b3a7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project @@ -558,13 +558,11 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, const VkImageAspectFlags aspect_mask = image.AspectMask(); const VkImage vk_image = image.Handle(); const bool is_initialized = image.ExchangeInitialization(); - scheduler.Record([vk_pipeline, vk_image, aspect_mask, - is_initialized](vk::CommandBuffer cmdbuf) { + scheduler.Record([vk_pipeline, vk_image, aspect_mask, is_initialized](vk::CommandBuffer cmdbuf) { const VkImageMemoryBarrier image_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = static_cast(is_initialized ? VK_ACCESS_SHADER_WRITE_BIT - : VK_ACCESS_NONE), + .srcAccessMask = VkAccessFlags(is_initialized ? VK_ACCESS_SHADER_WRITE_BIT : VK_ACCESS_NONE), .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, .newLayout = VK_IMAGE_LAYOUT_GENERAL, @@ -579,9 +577,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT - : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); + cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline); }); for (const VideoCommon::SwizzleParameters& swizzle : swizzles) { @@ -591,8 +587,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, const u32 num_dispatches_z = image.info.resources.layers; compute_pass_descriptor_queue.Acquire(); - compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset, - image.guest_size_bytes - swizzle.buffer_offset); + compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset, image.guest_size_bytes - swizzle.buffer_offset); compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level)); const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()}; @@ -601,8 +596,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, ASSERT(params.origin == (std::array{0, 0, 0})); ASSERT(params.destination == (std::array{0, 0, 0})); ASSERT(params.bytes_per_block_log2 == 4); - scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, - params, descriptor_data](vk::CommandBuffer cmdbuf) { + scheduler.Record([this, num_dispatches_x, num_dispatches_y, num_dispatches_z, block_dims, params, descriptor_data](vk::CommandBuffer cmdbuf) { const AstcPushConstants uniforms{ .blocks_dims = block_dims, .layer_stride = params.layer_stride, @@ -637,8 +631,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, image_barrier); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, image_barrier); }); } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 39a43d5950..95c753d61a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -848,13 +848,11 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched RenderPassCache& render_pass_cache_, DescriptorPool& descriptor_pool, ComputePassDescriptorQueue& compute_pass_descriptor_queue) - : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, - staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_}, - render_pass_cache{render_pass_cache_}, resolution{Settings::values.resolution_info} { - if (Settings::values.accelerate_astc.GetValue() == Settings::AstcDecodeMode::Gpu) { - astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, - compute_pass_descriptor_queue, memory_allocator); - } + : device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_} + , staging_buffer_pool{staging_buffer_pool_}, blit_image_helper{blit_image_helper_} + , render_pass_cache{render_pass_cache_}, resolution{Settings::values.resolution_info} +{ + astc_decoder_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue, memory_allocator); if (device.IsStorageImageMultisampleSupported()) { msaa_copy_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue); } @@ -1562,20 +1560,7 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu runtime->ViewFormats(info.format))), aspect_mask(ImageAspectMask(info.format)) { if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { - switch (Settings::values.accelerate_astc.GetValue()) { - case Settings::AstcDecodeMode::Gpu: - if (Settings::values.astc_recompression.GetValue() == - Settings::AstcRecompression::Uncompressed && - info.size.depth == 1) { - flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; - } - break; - case Settings::AstcDecodeMode::CpuAsynchronous: - flags |= VideoCommon::ImageFlagBits::AsynchronousDecode; - break; - default: - break; - } + flags |= VideoCommon::ImageFlagBits::AcceleratedUpload; flags |= VideoCommon::ImageFlagBits::Converted; flags |= VideoCommon::ImageFlagBits::CostlyLoad; } @@ -1588,13 +1573,10 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu } current_image = &Image::original_image; storage_image_views.resize(info.resources.levels); - if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported() && - Settings::values.astc_recompression.GetValue() == - Settings::AstcRecompression::Uncompressed) { + if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) { const auto& device = runtime->device.GetLogical(); for (s32 level = 0; level < info.resources.levels; ++level) { - storage_image_views[level] = - MakeStorageView(device, level, *original_image, VK_FORMAT_A8B8G8R8_UNORM_PACK32); + storage_image_views[level] = MakeStorageView(device, level, *original_image, VK_FORMAT_A8B8G8R8_UNORM_PACK32); } } } diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 1998849e84..3f6a4ebf30 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: 2014 Citra Emulator Project @@ -444,15 +444,17 @@ std::pair GetASTCBlockSize(PixelFormat format) { u64 TranscodedAstcSize(u64 base_size, PixelFormat format) { constexpr u64 RGBA8_PIXEL_SIZE = 4; - const u64 base_block_size = static_cast(DefaultBlockWidth(format)) * - static_cast(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE; + const u64 base_block_size = u64(DefaultBlockWidth(format)) * u64(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE; const u64 uncompressed_size = (base_size * base_block_size) / BytesPerBlock(format); - switch (Settings::values.astc_recompression.GetValue()) { case Settings::AstcRecompression::Bc1: return uncompressed_size / 8; case Settings::AstcRecompression::Bc3: return uncompressed_size / 4; + case Settings::AstcRecompression::Bc7: + return uncompressed_size / 4; + case Settings::AstcRecompression::Etc2: + return uncompressed_size / 4; //6=RGB, 4=RGBA default: return uncompressed_size; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 2564a67780..d1ab06dabd 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -2317,13 +2317,10 @@ ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const Imag template void TextureCache

::RegisterImage(ImageId image_id) { ImageBase& image = slot_images[image_id]; - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), - "Trying to register an already registered image"); + ASSERT(False(image.flags & ImageFlagBits::Registered) && "Trying to register an already registered image"); image.flags |= ImageFlagBits::Registered; u64 tentative_size = (std::max)(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { + if ((IsPixelFormatASTC(image.info.format) && True(image.flags & ImageFlagBits::AcceleratedUpload)) || True(image.flags & ImageFlagBits::Converted)) { tentative_size = TranscodedAstcSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); @@ -2495,9 +2492,7 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { total_used_memory -= GetScaledImageSizeBytes(image); } u64 tentative_size = (std::max)(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { + if ((IsPixelFormatASTC(image.info.format) && True(image.flags & ImageFlagBits::AcceleratedUpload)) || True(image.flags & ImageFlagBits::Converted)) { tentative_size = TranscodedAstcSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index e55d0752ec..c85e801ba9 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project @@ -33,7 +33,6 @@ #include "video_core/texture_cache/formatter.h" #include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/util.h" -#include "video_core/textures/astc.h" #include "video_core/textures/bcn.h" #include "video_core/textures/decoders.h" @@ -608,23 +607,18 @@ u32 CalculateConvertedSizeBytes(const ImageInfo& info) noexcept { return info.size.width * BytesPerBlock(info.format); } static constexpr Extent2D TILE_SIZE{1, 1}; - if (IsPixelFormatASTC(info.format) && Settings::values.astc_recompression.GetValue() != - Settings::AstcRecompression::Uncompressed) { - const u32 bpp_div = - Settings::values.astc_recompression.GetValue() == Settings::AstcRecompression::Bc1 ? 2 - : 1; + if (IsPixelFormatASTC(info.format) && Settings::values.astc_recompression.GetValue() != Settings::AstcRecompression::Uncompressed) { + const u32 bpp_div = Settings::values.astc_recompression.GetValue() == Settings::AstcRecompression::Bc1 ? 2 : 1; // NumBlocksPerLayer doesn't account for this correctly, so we have to do it manually. u32 output_size = 0; for (s32 i = 0; i < info.resources.levels; i++) { const auto mip_size = AdjustMipSize(info.size, i); - const u32 plane_dim = - Common::AlignUp(mip_size.width, 4U) * Common::AlignUp(mip_size.height, 4U); + const u32 plane_dim = Common::AlignUp(mip_size.width, 4U) * Common::AlignUp(mip_size.height, 4U); output_size += (plane_dim * info.size.depth * info.resources.layers) / bpp_div; } return output_size; } - return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * - ConvertedBytesPerBlock(info.format); + return NumBlocksPerLayer(info, TILE_SIZE) * info.resources.layers * ConvertedBytesPerBlock(info.format); } u32 CalculateLayerStride(const ImageInfo& info) noexcept { @@ -922,8 +916,7 @@ boost::container::small_vector UnswizzleImage(Tegra::Memory return copies; } -void ConvertImage(std::span input, const ImageInfo& info, std::span output, - std::span copies) { +void ConvertImage(std::span input, const ImageInfo& info, std::span output, std::span copies) { u32 output_offset = 0; Common::ScratchBuffer decode_scratch; @@ -939,56 +932,9 @@ void ConvertImage(std::span input, const ImageInfo& info, std::span(copy.buffer_size); - } else { - DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format); - output_offset += copy.image_extent.width * copy.image_extent.height * - copy.image_subresource.num_layers * - ConvertedBytesPerBlock(info.format); - } - + ASSERT(!IsPixelFormatASTC(info.format) && "CPU ASTC decoder is phased out"); + DecompressBCn(input_offset, output.subspan(output_offset), copy, info.format); + output_offset += copy.image_extent.width * copy.image_extent.height * copy.image_subresource.num_layers * ConvertedBytesPerBlock(info.format); copy.buffer_row_length = mip_size.width; copy.buffer_image_height = mip_size.height; } diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp deleted file mode 100644 index 85233d9357..0000000000 --- a/src/video_core/textures/astc.cpp +++ /dev/null @@ -1,1429 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -// SPDX-FileCopyrightText: 2016 The University of North Carolina at Chapel Hill -// SPDX-License-Identifier: Apache-2.0 - -// Please send all BUG REPORTS to . -// - -#include -#include -#include -#include -#include -#include - -#include - -#include "common/alignment.h" -#include "common/common_types.h" -#include -#include "video_core/textures/astc.h" -#include "video_core/textures/workers.h" - -class InputBitStream { -public: - constexpr explicit InputBitStream(std::span data, size_t start_offset = 0) - : cur_byte{data.data()}, total_bits{data.size()}, next_bit{start_offset % 8} {} - - constexpr size_t GetBitsRead() const { - return bits_read; - } - - constexpr bool ReadBit() { - if (bits_read >= total_bits * 8) { - return 0; - } - const bool bit = ((*cur_byte >> next_bit) & 1) != 0; - ++next_bit; - while (next_bit >= 8) { - next_bit -= 8; - ++cur_byte; - } - ++bits_read; - return bit; - } - - constexpr u32 ReadBits(std::size_t nBits) { - u32 ret = 0; - for (std::size_t i = 0; i < nBits; ++i) { - ret |= (ReadBit() & 1) << i; - } - return ret; - } - - template - constexpr u32 ReadBits() { - u32 ret = 0; - for (std::size_t i = 0; i < nBits; ++i) { - ret |= (ReadBit() & 1) << i; - } - return ret; - } - -private: - const u8* cur_byte; - size_t total_bits = 0; - size_t next_bit = 0; - size_t bits_read = 0; -}; - -class OutputBitStream { -public: - constexpr explicit OutputBitStream(u8* ptr, std::size_t bits = 0, std::size_t start_offset = 0) - : cur_byte{ptr}, num_bits{bits}, next_bit{start_offset % 8} {} - - constexpr std::size_t GetBitsWritten() const { - return bits_written; - } - - constexpr void WriteBitsR(u32 val, u32 nBits) { - for (u32 i = 0; i < nBits; i++) { - WriteBit((val >> (nBits - i - 1)) & 1); - } - } - - constexpr void WriteBits(u32 val, u32 nBits) { - for (u32 i = 0; i < nBits; i++) { - WriteBit((val >> i) & 1); - } - } - -private: - constexpr void WriteBit(bool b) { - if (bits_written >= num_bits) { - return; - } - - const u32 mask = 1 << next_bit++; - - // clear the bit - *cur_byte &= u8(~mask); - - // Write the bit, if necessary - if (b) - *cur_byte |= u8(mask); - - // Next byte? - if (next_bit >= 8) { - cur_byte += 1; - next_bit = 0; - } - } - - u8* cur_byte; - std::size_t num_bits; - std::size_t bits_written = 0; - std::size_t next_bit = 0; -}; - -template -class Bits { -public: - explicit Bits(const IntType& v) : m_Bits(v) {} - - Bits(const Bits&) = delete; - Bits& operator=(const Bits&) = delete; - - u8 operator[](u32 bitPos) const { - return u8((m_Bits >> bitPos) & 1); - } - - IntType operator()(u32 start, u32 end) const { - if (start == end) { - return (*this)[start]; - } else if (start > end) { - u32 t = start; - start = end; - end = t; - } - - u64 mask = (1 << (end - start + 1)) - 1; - return (m_Bits >> start) & static_cast(mask); - } - -private: - const IntType& m_Bits; -}; - -enum class IntegerEncoding { JustBits, Quint, Trit }; - -struct IntegerEncodedValue { - constexpr IntegerEncodedValue() = default; - - constexpr IntegerEncodedValue(IntegerEncoding encoding_, u32 num_bits_) - : encoding{encoding_}, num_bits{num_bits_} {} - - constexpr bool MatchesEncoding(const IntegerEncodedValue& other) const { - return encoding == other.encoding && num_bits == other.num_bits; - } - - // Returns the number of bits required to encode num_vals values. - u32 GetBitLength(u32 num_vals) const { - u32 total_bits = num_bits * num_vals; - if (encoding == IntegerEncoding::Quint) { - total_bits += (num_vals * 8 + 4) / 5; - } else if (encoding == IntegerEncoding::Trit) { - total_bits += (num_vals * 7 + 2) / 3; - } - return total_bits; - } - - IntegerEncoding encoding{}; - u32 num_bits = 0; - u32 bit_value = 0; - union { - u32 quint_value = 0; - u32 trit_value; - }; -}; - -// Returns a new instance of this struct that corresponds to the -// can take no more than mav_value values -static constexpr IntegerEncodedValue CreateEncoding(u32 mav_value) { - while (mav_value > 0) { - u32 check = mav_value + 1; - // Is mav_value a power of two? - if (!(check & (check - 1))) - return IntegerEncodedValue(IntegerEncoding::JustBits, std::popcount(mav_value)); - // Is mav_value of the type 3*2^n - 1? - if ((check % 3 == 0) && !((check / 3) & ((check / 3) - 1))) - return IntegerEncodedValue(IntegerEncoding::Trit, std::popcount(check / 3 - 1)); - // Is mav_value of the type 5*2^n - 1? - if ((check % 5 == 0) && !((check / 5) & ((check / 5) - 1))) - return IntegerEncodedValue(IntegerEncoding::Quint, std::popcount(check / 5 - 1)); - // Apparently it can't be represented with a bounded integer sequence... - // just iterate. - mav_value--; - } - return IntegerEncodedValue(IntegerEncoding::JustBits, 0); -} -static constexpr std::array MakeEncodedValues() { - std::array encodings{}; - for (std::size_t i = 0; i < encodings.size(); ++i) - encodings[i] = CreateEncoding(u32(i)); - return encodings; -} -static constexpr std::array ASTC_ENCODINGS_VALUES = MakeEncodedValues(); - -namespace Tegra::Texture::ASTC { -using IntegerEncodedVector = boost::container::static_vector; - -static void DecodeTritBlock(InputBitStream& bits, IntegerEncodedVector& result, u32 nBitsPerValue) { - // Implement the algorithm in section C.2.12 - std::array m; - std::array t; - u32 T; - - // Read the trit encoded block according to - // table C.2.14 - m[0] = bits.ReadBits(nBitsPerValue); - T = bits.ReadBits<2>(); - m[1] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBits<2>() << 2; - m[2] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBit() << 4; - m[3] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBits<2>() << 5; - m[4] = bits.ReadBits(nBitsPerValue); - T |= bits.ReadBit() << 7; - - u32 C = 0; - - Bits Tb(T); - if (Tb(2, 4) == 7) { - C = (Tb(5, 7) << 2) | Tb(0, 1); - t[4] = t[3] = 2; - } else { - C = Tb(0, 4); - if (Tb(5, 6) == 3) { - t[4] = 2; - t[3] = Tb[7]; - } else { - t[4] = Tb[7]; - t[3] = Tb(5, 6); - } - } - - Bits Cb(C); - if (Cb(0, 1) == 3) { - t[2] = 2; - t[1] = Cb[4]; - t[0] = (Cb[3] << 1) | (Cb[2] & ~Cb[3]); - } else if (Cb(2, 3) == 3) { - t[2] = 2; - t[1] = 2; - t[0] = Cb(0, 1); - } else { - t[2] = Cb[4]; - t[1] = Cb(2, 3); - t[0] = (Cb[1] << 1) | (Cb[0] & ~Cb[1]); - } - - for (std::size_t i = 0; i < 5; ++i) { - IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Trit, nBitsPerValue); - val.bit_value = m[i]; - val.trit_value = t[i]; - } -} - -static void DecodeQuintBlock(InputBitStream& bits, IntegerEncodedVector& result, - u32 nBitsPerValue) { - // Implement the algorithm in section C.2.12 - u32 m[3]; - u32 q[3]; - u32 Q; - - // Read the trit encoded block according to - // table C.2.15 - m[0] = bits.ReadBits(nBitsPerValue); - Q = bits.ReadBits<3>(); - m[1] = bits.ReadBits(nBitsPerValue); - Q |= bits.ReadBits<2>() << 3; - m[2] = bits.ReadBits(nBitsPerValue); - Q |= bits.ReadBits<2>() << 5; - - Bits Qb(Q); - if (Qb(1, 2) == 3 && Qb(5, 6) == 0) { - q[0] = q[1] = 4; - q[2] = (Qb[0] << 2) | ((Qb[4] & ~Qb[0]) << 1) | (Qb[3] & ~Qb[0]); - } else { - u32 C = 0; - if (Qb(1, 2) == 3) { - q[2] = 4; - C = (Qb(3, 4) << 3) | ((~Qb(5, 6) & 3) << 1) | Qb[0]; - } else { - q[2] = Qb(5, 6); - C = Qb(0, 4); - } - - Bits Cb(C); - if (Cb(0, 2) == 5) { - q[1] = 4; - q[0] = Cb(3, 4); - } else { - q[1] = Cb(3, 4); - q[0] = Cb(0, 2); - } - } - - for (std::size_t i = 0; i < 3; ++i) { - IntegerEncodedValue& val = result.emplace_back(IntegerEncoding::Quint, nBitsPerValue); - val.bit_value = m[i]; - val.quint_value = q[i]; - } -} - -// Fills result with the values that are encoded in the given -// bitstream. We must know beforehand what the maximum possible -// value is, and how many values we're decoding. -static void DecodeIntegerSequence(IntegerEncodedVector& result, InputBitStream& bits, u32 maxRange, u32 nValues) { - // Determine encoding parameters - IntegerEncodedValue val = ASTC_ENCODINGS_VALUES[maxRange]; - // Start decoding - for (u32 i = 0; i < nValues; ) { - switch (val.encoding) { - case IntegerEncoding::Quint: - DecodeQuintBlock(bits, result, val.num_bits); - i += 3; - break; - case IntegerEncoding::Trit: - DecodeTritBlock(bits, result, val.num_bits); - i += 5; - break; - case IntegerEncoding::JustBits: - val.bit_value = bits.ReadBits(val.num_bits); - result.push_back(val); - i++; - break; - } - } -} - -struct Texelw_params { - u32 m_Width = 0; - u32 m_Height = 0; - bool m_bDualPlane = false; - u32 m_MaxWeight = 0; - bool m_bError = false; - bool m_bVoidExtentLDR = false; - bool m_bVoidExtentHDR = false; - - constexpr u32 GetPackedBitSize() const noexcept { - // How many indices do we have? - u32 indices = (m_Height * m_Width) * (m_bDualPlane ? 2 : 1); - return ASTC_ENCODINGS_VALUES[m_MaxWeight].GetBitLength(indices); - } - - constexpr u32 GetNumWeightValues() const noexcept { - return (m_Height * m_Width) << (m_bDualPlane ? 2 : 0); - } -}; - -static Texelw_params DecodeBlockInfo(InputBitStream& strm) { - Texelw_params params; - - // Read the entire block mode all at once - u16 modeBits = u16(strm.ReadBits<11>()); - - // Does this match the void extent block mode? - if ((modeBits & 0x01FF) == 0x1FC) { - if (modeBits & 0x200) { - params.m_bVoidExtentHDR = true; - } else { - params.m_bVoidExtentLDR = true; - } - - // Next two bits must be one. - if (!(modeBits & 0x400) || !strm.ReadBit()) { - params.m_bError = true; - } - - return params; - } - - // First check if the last four bits are zero - if ((modeBits & 0xF) == 0) { - params.m_bError = true; - return params; - } - - // If the last two bits are zero, then if bits - // [6-8] are all ones, this is also reserved. - if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0) { - params.m_bError = true; - return params; - } - - // Otherwise, there is no error... Figure out the layout - // of the block mode. Layout is determined by a number - // between 0 and 9 corresponding to table C.2.8 of the - // ASTC spec. - u32 layout = 0; - - if ((modeBits & 0x1) || (modeBits & 0x2)) { - // layout is in [0-4] - if (modeBits & 0x8) { - // layout is in [2-4] - if (modeBits & 0x4) { - // layout is in [3-4] - if (modeBits & 0x100) { - layout = 4; - } else { - layout = 3; - } - } else { - layout = 2; - } - } else { - // layout is in [0-1] - if (modeBits & 0x4) { - layout = 1; - } else { - layout = 0; - } - } - } else { - // layout is in [5-9] - if (modeBits & 0x100) { - // layout is in [7-9] - if (modeBits & 0x80) { - // layout is in [7-8] - assert((modeBits & 0x40) == 0U); - if (modeBits & 0x20) { - layout = 8; - } else { - layout = 7; - } - } else { - layout = 9; - } - } else { - // layout is in [5-6] - if (modeBits & 0x80) { - layout = 6; - } else { - layout = 5; - } - } - } - - assert(layout < 10); - - // Determine R - u32 R = !!(modeBits & 0x10); - if (layout < 5) { - R |= (modeBits & 0x3) << 1; - } else { - R |= (modeBits & 0xC) >> 1; - } - assert(2 <= R && R <= 7); - - // Determine width & height - switch (layout) { - case 0: { - u32 A = (modeBits >> 5) & 0x3; - u32 B = (modeBits >> 7) & 0x3; - params.m_Width = B + 4; - params.m_Height = A + 2; - break; - } - - case 1: { - u32 A = (modeBits >> 5) & 0x3; - u32 B = (modeBits >> 7) & 0x3; - params.m_Width = B + 8; - params.m_Height = A + 2; - break; - } - - case 2: { - u32 A = (modeBits >> 5) & 0x3; - u32 B = (modeBits >> 7) & 0x3; - params.m_Width = A + 2; - params.m_Height = B + 8; - break; - } - - case 3: { - u32 A = (modeBits >> 5) & 0x3; - u32 B = (modeBits >> 7) & 0x1; - params.m_Width = A + 2; - params.m_Height = B + 6; - break; - } - - case 4: { - u32 A = (modeBits >> 5) & 0x3; - u32 B = (modeBits >> 7) & 0x1; - params.m_Width = B + 2; - params.m_Height = A + 2; - break; - } - - case 5: { - u32 A = (modeBits >> 5) & 0x3; - params.m_Width = 12; - params.m_Height = A + 2; - break; - } - - case 6: { - u32 A = (modeBits >> 5) & 0x3; - params.m_Width = A + 2; - params.m_Height = 12; - break; - } - - case 7: { - params.m_Width = 6; - params.m_Height = 10; - break; - } - - case 8: { - params.m_Width = 10; - params.m_Height = 6; - break; - } - - case 9: { - u32 A = (modeBits >> 5) & 0x3; - u32 B = (modeBits >> 9) & 0x3; - params.m_Width = A + 6; - params.m_Height = B + 6; - break; - } - - default: - assert(false && "Don't know this layout..."); - params.m_bError = true; - break; - } - - // Determine whether or not we're using dual planes - // and/or high precision layouts. - bool D = (layout != 9) && (modeBits & 0x400); - bool H = (layout != 9) && (modeBits & 0x200); - - if (H) { - const u32 maxWeights[6] = {9, 11, 15, 19, 23, 31}; - params.m_MaxWeight = maxWeights[R - 2]; - } else { - const u32 maxWeights[6] = {1, 2, 3, 4, 5, 7}; - params.m_MaxWeight = maxWeights[R - 2]; - } - - params.m_bDualPlane = D; - - return params; -} - -// Replicates low num_bits such that [(to_bit - 1):(to_bit - 1 - from_bit)] -// is the same as [(num_bits - 1):0] and repeats all the way down. -// to_bit range is expected from 0 to 8 -// num_bits range is from 0 to 7 -[[nodiscard]] constexpr u32 Replicate32(u32 v, u32 num_bits, u32 to_bit) { - auto val = v & ((1 << num_bits) - 1); - val |= val << (num_bits << 0); - val |= val << (num_bits << 1); - val |= val << (num_bits << 2); - auto const val_mask = u32(1 << to_bit) - 1; - return (v & ~val_mask) | (val & val_mask); -} - -struct Pixel { - using ChannelType = s16; - ChannelType color[4] = {}; - - Pixel() = default; - Pixel(u32 a, u32 r, u32 g, u32 b, u32 bitDepth = 8) - : color{ChannelType(a), ChannelType(r), ChannelType(g), ChannelType(b)} - {} - - template - static float ConvertChannelToFloat(T channel, u8 bit_depth) { - auto const denominator = f32((1 << bit_depth) - 1); - return f32(channel) / denominator; - } - - const ChannelType& A() const { - return color[0]; - } - ChannelType& A() { - return color[0]; - } - const ChannelType& R() const { - return color[1]; - } - ChannelType& R() { - return color[1]; - } - const ChannelType& G() const { - return color[2]; - } - ChannelType& G() { - return color[2]; - } - const ChannelType& B() const { - return color[3]; - } - ChannelType& B() { - return color[3]; - } - const ChannelType& Component(u32 idx) const { - return color[idx]; - } - ChannelType& Component(u32 idx) { - return color[idx]; - } - - // Take all of the components, transform them to their 8-bit variants, - // and then pack each channel into an R8G8B8A8 32-bit integer. We assume - // that the architecture is little-endian, so the alpha channel will end - // up in the most-significant byte. - [[nodiscard]] inline u32 Pack() const noexcept { - return (u32(color[0]) << 24) - | (u32(color[3]) << 16) - | (u32(color[2]) << 8) - | (u32(color[1]) << 0); - } - - // Clamps the pixel to the range [0,255] - void ClampByte() { - for (u32 i = 0; i < 4; i++) - color[i] = (color[i] < 0) ? 0 : ((color[i] > 255) ? 255 : color[i]); - } -}; - -static void DecodeColorValues(u32* out, std::span data, const u32* modes, const u32 n_partitions, const u32 nBitsForColorData) { - // First figure out how many color values we have - u32 nValues = 0; - for (u32 i = 0; i < n_partitions; i++) { - nValues += ((modes[i] >> 2) + 1) << 1; - } - - // Then based on the number of values and the remaining number of bits, - // figure out the max value for each of them... - u32 range = 256; - while (--range > 0) { - auto const val = ASTC_ENCODINGS_VALUES[range]; - u32 bitLength = val.GetBitLength(nValues); - if (bitLength <= nBitsForColorData) { - // Find the smallest possible range that matches the given encoding - while (--range > 0) { - if (!ASTC_ENCODINGS_VALUES[range].MatchesEncoding(val)) - break; - } - - // Return to last matching range. - range++; - break; - } - } - - // We now have enough to decode our integer sequence. - IntegerEncodedVector decodedColorValues; - - InputBitStream colorStream(data, 0); - DecodeIntegerSequence(decodedColorValues, colorStream, range, nValues); - - // Once we have the decoded values, we need to dequantize them to the 0-255 range - // This procedure is outlined in ASTC spec C.2.13 - u32 outIdx = 0; - for (auto itr = decodedColorValues.begin(); itr != decodedColorValues.end(); ++itr) { - // Have we already decoded all that we need? - if (outIdx >= nValues) { - break; - } - - const IntegerEncodedValue& val = *itr; - u32 bitlen = val.num_bits; - u32 bitval = val.bit_value; - - assert(bitlen >= 1); - - // A is just the lsb replicated 9 times. - u32 A = (bitval & 1) ? ((1 << 9) - 1) : 0; - u32 B = 0, C = 0, D = 0; - - switch (val.encoding) { - // Replicate bits - case IntegerEncoding::JustBits: - out[outIdx++] = Replicate32(bitval, bitlen, 8); - break; - - // Use algorithm in C.2.13 - case IntegerEncoding::Trit: { - - D = val.trit_value; - - switch (bitlen) { - case 1: { - C = 204; - } break; - - case 2: { - C = 93; - // B = b000b0bb0 - u32 b = (bitval >> 1) & 1; - B = (b << 8) | (b << 4) | (b << 2) | (b << 1); - } break; - - case 3: { - C = 44; - // B = cb000cbcb - u32 cb = (bitval >> 1) & 3; - B = (cb << 7) | (cb << 2) | cb; - } break; - - case 4: { - C = 22; - // B = dcb000dcb - u32 dcb = (bitval >> 1) & 7; - B = (dcb << 6) | dcb; - } break; - - case 5: { - C = 11; - // B = edcb000ed - u32 edcb = (bitval >> 1) & 0xF; - B = (edcb << 5) | (edcb >> 2); - } break; - - case 6: { - C = 5; - // B = fedcb000f - u32 fedcb = (bitval >> 1) & 0x1F; - B = (fedcb << 4) | (fedcb >> 4); - } break; - - default: - assert(false && "Unsupported trit encoding for color values!"); - break; - } // switch(bitlen) - } // case IntegerEncoding::Trit - break; - - case IntegerEncoding::Quint: { - - D = val.quint_value; - - switch (bitlen) { - case 1: { - C = 113; - } break; - - case 2: { - C = 54; - // B = b0000bb00 - u32 b = (bitval >> 1) & 1; - B = (b << 8) | (b << 3) | (b << 2); - } break; - - case 3: { - C = 26; - // B = cb0000cbc - u32 cb = (bitval >> 1) & 3; - B = (cb << 7) | (cb << 1) | (cb >> 1); - } break; - - case 4: { - C = 13; - // B = dcb0000dc - u32 dcb = (bitval >> 1) & 7; - B = (dcb << 6) | (dcb >> 1); - } break; - - case 5: { - C = 6; - // B = edcb0000e - u32 edcb = (bitval >> 1) & 0xF; - B = (edcb << 5) | (edcb >> 3); - } break; - - default: - assert(false && "Unsupported quint encoding for color values!"); - break; - } // switch(bitlen) - } // case IntegerEncoding::Quint - break; - } // switch(val.encoding) - - if (val.encoding != IntegerEncoding::JustBits) { - u32 T = D * C + B; - T ^= A; - T = (A & 0x80) | (T >> 2); - out[outIdx++] = T; - } - } - - // Make sure that each of our values is in the proper range... - for (u32 i = 0; i < nValues; i++) { - assert(out[i] <= 255); - } -} - -static u32 UnquantizeTexelWeight(const IntegerEncodedValue& val) { - u32 bitval = val.bit_value; - u32 bitlen = val.num_bits; - - // A is just LSB repeated 7 times - u32 A = (bitval & 1) ? ((1 << 7) - 1) : 0; - u32 B = 0, C = 0, D = 0; - - u32 result = 0; - switch (val.encoding) { - case IntegerEncoding::JustBits: - result = Replicate32(bitval, bitlen, 6); - break; - - case IntegerEncoding::Trit: { - D = val.trit_value; - assert(D < 3); - - switch (bitlen) { - case 0: { - u32 results[3] = {0, 32, 63}; - result = results[D]; - } break; - - case 1: { - C = 50; - } break; - - case 2: { - C = 23; - u32 b = (bitval >> 1) & 1; - B = (b << 6) | (b << 2) | b; - } break; - - case 3: { - C = 11; - u32 cb = (bitval >> 1) & 3; - B = (cb << 5) | cb; - } break; - - default: - assert(false && "Invalid trit encoding for texel weight"); - break; - } - } break; - - case IntegerEncoding::Quint: { - D = val.quint_value; - assert(D < 5); - - switch (bitlen) { - case 0: { - u32 results[5] = {0, 16, 32, 47, 63}; - result = results[D]; - } break; - - case 1: { - C = 28; - } break; - - case 2: { - C = 13; - u32 b = (bitval >> 1) & 1; - B = (b << 6) | (b << 1); - } break; - - default: - assert(false && "Invalid quint encoding for texel weight"); - break; - } - } break; - } - - if (val.encoding != IntegerEncoding::JustBits && bitlen > 0) { - // Decode the value... - result = D * C + B; - result ^= A; - result = (A & 0x20) | (result >> 2); - } - - assert(result < 64); - - // Change from [0,63] to [0,64] - if (result > 32) { - result += 1; - } - - return result; -} - -static void UnquantizeTexelWeights(u32 out[2][144], const IntegerEncodedVector& weights, - const Texelw_params& params, const u32 blk_width, - const u32 blk_height) { - u32 weightIdx = 0; - u32 unquantized[2][144]; - - for (auto itr = weights.begin(); itr != weights.end(); ++itr) { - unquantized[0][weightIdx] = UnquantizeTexelWeight(*itr); - - if (params.m_bDualPlane) { - ++itr; - unquantized[1][weightIdx] = UnquantizeTexelWeight(*itr); - if (itr == weights.end()) { - break; - } - } - - if (++weightIdx >= (params.m_Width * params.m_Height)) - break; - } - - // Do infill if necessary (Section C.2.18) ... - u32 Ds = (1024 + (blk_width / 2)) / (blk_width - 1); - u32 Dt = (1024 + (blk_height / 2)) / (blk_height - 1); - - const u32 kPlaneScale = params.m_bDualPlane ? 2U : 1U; - for (u32 plane = 0; plane < kPlaneScale; plane++) - for (u32 t = 0; t < blk_height; t++) - for (u32 s = 0; s < blk_width; s++) { - u32 cs = Ds * s; - u32 ct = Dt * t; - - u32 gs = (cs * (params.m_Width - 1) + 32) >> 6; - u32 gt = (ct * (params.m_Height - 1) + 32) >> 6; - - u32 js = gs >> 4; - u32 fs = gs & 0xF; - - u32 jt = gt >> 4; - u32 ft = gt & 0x0F; - - u32 w11 = (fs * ft + 8) >> 4; - u32 w10 = ft - w11; - u32 w01 = fs - w11; - u32 w00 = 16 - fs - ft + w11; - - u32 v0 = js + jt * params.m_Width; - -#define FIND_TEXEL(tidx, bidx) \ - u32 p##bidx = 0; \ - do { \ - if ((tidx) < (params.m_Width * params.m_Height)) { \ - p##bidx = unquantized[plane][(tidx)]; \ - } \ - } while (0) - - FIND_TEXEL(v0, 00); - FIND_TEXEL(v0 + 1, 01); - FIND_TEXEL(v0 + params.m_Width, 10); - FIND_TEXEL(v0 + params.m_Width + 1, 11); - -#undef FIND_TEXEL - - out[plane][t * blk_width + s] = - (p00 * w00 + p01 * w01 + p10 * w10 + p11 * w11 + 8) >> 4; - } -} - -// Transfers a bit as described in C.2.14 -static inline void BitTransferSigned(int& a, int& b) { - b >>= 1; - b |= a & 0x80; - a >>= 1; - a &= 0x3F; - if (a & 0x20) - a -= 0x40; -} - -// Adds more precision to the blue channel as described -// in C.2.14 -static inline Pixel BlueContract(s32 a, s32 r, s32 g, s32 b) { - return Pixel(s16(a), s16((r + b) >> 1), s16((g + b) >> 1), s16(b), 8); -} - -// Partition selection functions as specified in -// C.2.21 -static inline u32 hash52(u32 p) { - p ^= p >> 15; - p -= p << 17; - p += p << 7; - p += p << 4; - p ^= p >> 5; - p += p << 16; - p ^= p >> 7; - p ^= p >> 3; - p ^= p << 6; - p ^= p >> 17; - return p; -} - -static u32 SelectPartition(s32 seed, s32 x, s32 y, s32 z, s32 partitionCount, s32 smallBlock) { - if (1 == partitionCount) - return 0; - - if (smallBlock) { - x <<= 1; - y <<= 1; - z <<= 1; - } - - seed += (partitionCount - 1) * 1024; - - u32 rnum = hash52(u32(seed)); - u8 seed1 = u8(rnum & 0xF); - u8 seed2 = u8((rnum >> 4) & 0xF); - u8 seed3 = u8((rnum >> 8) & 0xF); - u8 seed4 = u8((rnum >> 12) & 0xF); - u8 seed5 = u8((rnum >> 16) & 0xF); - u8 seed6 = u8((rnum >> 20) & 0xF); - u8 seed7 = u8((rnum >> 24) & 0xF); - u8 seed8 = u8((rnum >> 28) & 0xF); - u8 seed9 = u8((rnum >> 18) & 0xF); - u8 seed10 = u8((rnum >> 22) & 0xF); - u8 seed11 = u8((rnum >> 26) & 0xF); - u8 seed12 = u8(((rnum >> 30) | (rnum << 2)) & 0xF); - - seed1 = u8(seed1 * seed1); - seed2 = u8(seed2 * seed2); - seed3 = u8(seed3 * seed3); - seed4 = u8(seed4 * seed4); - seed5 = u8(seed5 * seed5); - seed6 = u8(seed6 * seed6); - seed7 = u8(seed7 * seed7); - seed8 = u8(seed8 * seed8); - seed9 = u8(seed9 * seed9); - seed10 = u8(seed10 * seed10); - seed11 = u8(seed11 * seed11); - seed12 = u8(seed12 * seed12); - - s32 sh1, sh2, sh3; - if (seed & 1) { - sh1 = (seed & 2) ? 4 : 5; - sh2 = (partitionCount == 3) ? 6 : 5; - } else { - sh1 = (partitionCount == 3) ? 6 : 5; - sh2 = (seed & 2) ? 4 : 5; - } - sh3 = (seed & 0x10) ? sh1 : sh2; - - seed1 = u8(seed1 >> sh1); - seed2 = u8(seed2 >> sh2); - seed3 = u8(seed3 >> sh1); - seed4 = u8(seed4 >> sh2); - seed5 = u8(seed5 >> sh1); - seed6 = u8(seed6 >> sh2); - seed7 = u8(seed7 >> sh1); - seed8 = u8(seed8 >> sh2); - seed9 = u8(seed9 >> sh3); - seed10 = u8(seed10 >> sh3); - seed11 = u8(seed11 >> sh3); - seed12 = u8(seed12 >> sh3); - - s32 a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); - s32 b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); - s32 c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); - s32 d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); - - a &= 0x3F; - b &= 0x3F; - c &= 0x3F; - d &= 0x3F; - - if (partitionCount < 4) - d = 0; - if (partitionCount < 3) - c = 0; - - if (a >= b && a >= c && a >= d) - return 0; - else if (b >= c && b >= d) - return 1; - else if (c >= d) - return 2; - return 3; -} - -static inline u32 Select2DPartition(s32 seed, s32 x, s32 y, s32 partitionCount, s32 smallBlock) { - return SelectPartition(seed, x, y, 0, partitionCount, smallBlock); -} - -// Section C.2.14 -static void ComputeEndpoints(Pixel& ep1, Pixel& ep2, const u32*& colorValues, u32 colorEndpointMode) { -#define READ_UINT_VALUES(N) \ - u32 v[N]; \ - for (u32 i = 0; i < N; i++) { \ - v[i] = *(colorValues++); \ - } - -#define READ_INT_VALUES(N) \ - s32 v[N]; \ - for (u32 i = 0; i < N; i++) { \ - v[i] = static_cast(*(colorValues++)); \ - } - - switch (colorEndpointMode) { - case 0: { - READ_UINT_VALUES(2) - ep1 = Pixel(0xFF, v[0], v[0], v[0]); - ep2 = Pixel(0xFF, v[1], v[1], v[1]); - } break; - - case 1: { - READ_UINT_VALUES(2) - u32 L0 = (v[0] >> 2) | (v[1] & 0xC0); - u32 L1 = (std::min)(L0 + (v[1] & 0x3F), 0xFFU); - ep1 = Pixel(0xFF, L0, L0, L0); - ep2 = Pixel(0xFF, L1, L1, L1); - } break; - - case 4: { - READ_UINT_VALUES(4) - ep1 = Pixel(v[2], v[0], v[0], v[0]); - ep2 = Pixel(v[3], v[1], v[1], v[1]); - } break; - - case 5: { - READ_INT_VALUES(4) - BitTransferSigned(v[1], v[0]); - BitTransferSigned(v[3], v[2]); - ep1 = Pixel(v[2], v[0], v[0], v[0]); - ep2 = Pixel(v[2] + v[3], v[0] + v[1], v[0] + v[1], v[0] + v[1]); - ep1.ClampByte(); - ep2.ClampByte(); - } break; - - case 6: { - READ_UINT_VALUES(4) - ep1 = Pixel(0xFF, v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); - ep2 = Pixel(0xFF, v[0], v[1], v[2]); - } break; - - case 8: { - READ_UINT_VALUES(6) - if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { - ep1 = Pixel(0xFF, v[0], v[2], v[4]); - ep2 = Pixel(0xFF, v[1], v[3], v[5]); - } else { - ep1 = BlueContract(0xFF, v[1], v[3], v[5]); - ep2 = BlueContract(0xFF, v[0], v[2], v[4]); - } - } break; - - case 9: { - READ_INT_VALUES(6) - BitTransferSigned(v[1], v[0]); - BitTransferSigned(v[3], v[2]); - BitTransferSigned(v[5], v[4]); - if (v[1] + v[3] + v[5] >= 0) { - ep1 = Pixel(0xFF, v[0], v[2], v[4]); - ep2 = Pixel(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]); - } else { - ep1 = BlueContract(0xFF, v[0] + v[1], v[2] + v[3], v[4] + v[5]); - ep2 = BlueContract(0xFF, v[0], v[2], v[4]); - } - ep1.ClampByte(); - ep2.ClampByte(); - } break; - - case 10: { - READ_UINT_VALUES(6) - ep1 = Pixel(v[4], v[0] * v[3] >> 8, v[1] * v[3] >> 8, v[2] * v[3] >> 8); - ep2 = Pixel(v[5], v[0], v[1], v[2]); - } break; - - case 12: { - READ_UINT_VALUES(8) - if (v[1] + v[3] + v[5] >= v[0] + v[2] + v[4]) { - ep1 = Pixel(v[6], v[0], v[2], v[4]); - ep2 = Pixel(v[7], v[1], v[3], v[5]); - } else { - ep1 = BlueContract(v[7], v[1], v[3], v[5]); - ep2 = BlueContract(v[6], v[0], v[2], v[4]); - } - } break; - - case 13: { - READ_INT_VALUES(8) - BitTransferSigned(v[1], v[0]); - BitTransferSigned(v[3], v[2]); - BitTransferSigned(v[5], v[4]); - BitTransferSigned(v[7], v[6]); - if (v[1] + v[3] + v[5] >= 0) { - ep1 = Pixel(v[6], v[0], v[2], v[4]); - ep2 = Pixel(v[7] + v[6], v[0] + v[1], v[2] + v[3], v[4] + v[5]); - } else { - ep1 = BlueContract(v[6] + v[7], v[0] + v[1], v[2] + v[3], v[4] + v[5]); - ep2 = BlueContract(v[6], v[0], v[2], v[4]); - } - ep1.ClampByte(); - ep2.ClampByte(); - } break; - - default: - assert(false && "Unsupported color endpoint mode (is it HDR?)"); - break; - } - -#undef READ_UINT_VALUES -#undef READ_INT_VALUES -} - -static void DecompressBlock(std::span in_buf, const u32 blk_width, const u32 blk_height, std::span out_buf) { - InputBitStream strm(in_buf); - Texelw_params w_params = DecodeBlockInfo(strm); - - // Read num partitions - u32 n_partitions = strm.ReadBits<2>() + 1; - assert(n_partitions <= 4); - // Was there an error? - assert(!w_params.m_bError - && !w_params.m_bVoidExtentLDR - && !w_params.m_bVoidExtentHDR - && !(w_params.m_Width > blk_width) - && !(w_params.m_Height > blk_height) - && !(n_partitions == 4 && w_params.m_bDualPlane) - ); - - // Based on the number of partitions, read the color endpoint mode for - // each partition. - - // Determine partitions, partition index, and color endpoint modes - u32 plane_index = UINT32_MAX; - u32 partition_index{}; - u32 colorEndpointMode[4] = {0, 0, 0, 0}; - - // Define color data. - u8 color_endpoint_data[16] = {}; - OutputBitStream color_endpoint_stream(color_endpoint_data, 16 * 8, 0); - - // Read extra config data... - u32 base_cem = 0; - if (n_partitions == 1) { - colorEndpointMode[0] = strm.ReadBits<4>(); - partition_index = 0; - } else { - partition_index = strm.ReadBits<10>(); - base_cem = strm.ReadBits<6>(); - } - u32 baseMode = (base_cem & 3); - - // Remaining bits are color endpoint data... - u32 nWeightBits = w_params.GetPackedBitSize(); - s32 rem_bits = 128 - nWeightBits - s32(strm.GetBitsRead()); - - // Consider extra bits prior to texel data... - u32 extra_cem_bits = 0; - if (baseMode) { - assert(n_partitions == 2 || n_partitions == 3 || n_partitions == 4); - extra_cem_bits += (0x85200 >> (n_partitions * 4)) & 0x0f; - } - rem_bits -= extra_cem_bits; - - // Do we have a dual plane situation? - u32 planeSelectorBits = 0; - if (w_params.m_bDualPlane) { - planeSelectorBits = 2; - } - rem_bits -= planeSelectorBits; - - // Read color data... - u32 colorDataBits = rem_bits; - while (rem_bits > 0) { - u32 nb = (std::min)(rem_bits, 8); - u32 b = strm.ReadBits(nb); - color_endpoint_stream.WriteBits(b, nb); - rem_bits -= 8; - } - - // Read the plane selection bits - plane_index = strm.ReadBits(planeSelectorBits); - - // Read the rest of the CEM - if (baseMode) { - u32 extraCEM = strm.ReadBits(extra_cem_bits); - u32 CEM = (extraCEM << 6) | base_cem; - CEM >>= 2; - - bool C[4] = {0}; - for (u32 i = 0; i < n_partitions; i++) { - C[i] = CEM & 1; - CEM >>= 1; - } - - u8 M[4] = {0}; - for (u32 i = 0; i < n_partitions; i++) { - M[i] = CEM & 3; - CEM >>= 2; - assert(M[i] <= 3); - } - - for (u32 i = 0; i < n_partitions; i++) { - colorEndpointMode[i] = baseMode; - if (!(C[i])) - colorEndpointMode[i] -= 1; - colorEndpointMode[i] <<= 2; - colorEndpointMode[i] |= M[i]; - } - } else if (n_partitions > 1) { - u32 CEM = base_cem >> 2; - for (u32 i = 0; i < n_partitions; i++) { - colorEndpointMode[i] = CEM; - } - } - - // Make sure everything up till here is sane. - for (u32 i = 0; i < n_partitions; i++) { - assert(colorEndpointMode[i] < 16); - } - assert(strm.GetBitsRead() + w_params.GetPackedBitSize() == 128); - - // Decode both color data and texel weight data - u32 colorValues[32]; // Four values, two endpoints, four maximum partitions - DecodeColorValues(colorValues, color_endpoint_data, colorEndpointMode, n_partitions, - colorDataBits); - - Pixel endpoints[4][2]; - const u32* colorValuesPtr = colorValues; - for (u32 i = 0; i < n_partitions; i++) { - ComputeEndpoints(endpoints[i][0], endpoints[i][1], colorValuesPtr, colorEndpointMode[i]); - } - - // Read the texel weight data.. - std::array texel_weights; - std::ranges::copy(in_buf, texel_weights.begin()); - - // Reverse everything - for (u32 i = 0; i < 8; i++) { -// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits -#define REVERSE_BYTE(b) (((b)*0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32 - u8 a = u8(REVERSE_BYTE(texel_weights[i])); - u8 b = u8(REVERSE_BYTE(texel_weights[15 - i])); -#undef REVERSE_BYTE - - texel_weights[i] = b; - texel_weights[15 - i] = a; - } - - // Make sure that higher non-texel bits are set to zero - const u32 clearByteStart = (w_params.GetPackedBitSize() >> 3) + 1; - if (clearByteStart > 0 && clearByteStart <= texel_weights.size()) { - texel_weights[clearByteStart - 1] &= u8((1 << (w_params.GetPackedBitSize() % 8)) - 1); - std::memset(texel_weights.data() + clearByteStart, 0, (std::min)(16U - clearByteStart, 16U)); - } - - IntegerEncodedVector texelWeightValues; - - InputBitStream weightStream(texel_weights); - - DecodeIntegerSequence(texelWeightValues, weightStream, w_params.m_MaxWeight, - w_params.GetNumWeightValues()); - - // Blocks can be at most 12x12, so we can have as many as 144 weights - u32 weights[2][144]; - UnquantizeTexelWeights(weights, texelWeightValues, w_params, blk_width, blk_height); - - // Now that we have endpoints and weights, we can interpolate and generate - // the proper decoding... - for (u32 j = 0; j < blk_height; j++) - for (u32 i = 0; i < blk_width; i++) { - u32 partition = Select2DPartition(partition_index, i, j, n_partitions, (blk_height * blk_width) < 32); - assert(partition < n_partitions); - Pixel p; - for (u32 c = 0; c < 4; c++) { - u32 C0 = endpoints[partition][0].Component(c); - u32 C1 = endpoints[partition][1].Component(c); - C0 = (C0 & 0xff) | ((C0 & 0xff) << 8); - C1 = (C1 & 0xff) | ((C0 & 0xff) << 8); - u32 plane = 0; - if (w_params.m_bDualPlane && (((plane_index + 1) & 3) == c)) { - plane = 1; - } - u32 weight = weights[plane][j * blk_width + i]; - u32 C = (C0 * (64 - weight) + C1 * weight + 32) / 64; - if (C == 65535) { - p.Component(c) = 255; - } else { - f64 Cf = f64(C); - p.Component(c) = u16(255.0 * (Cf / 65536.0) + 0.5); - } - } - out_buf[j * blk_width + i] = p.Pack(); - } -} - -void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, - uint32_t block_width, uint32_t block_height, std::span output) { - const u32 rows = Common::DivideUp(height, block_height); - const u32 cols = Common::DivideUp(width, block_width); - - Common::ThreadWorker& workers = GetThreadWorkers(); - for (u32 z = 0; z < depth; ++z) { - const u32 depth_offset = z * height * width * 4; - for (u32 y_index = 0; y_index < rows; ++y_index) { - auto decompress_stride = [data, width, height, block_width, block_height, output, rows, - cols, z, depth_offset, y_index] { - const u32 y = y_index * block_height; - for (u32 x_index = 0; x_index < cols; ++x_index) { - const u32 block_index = (z * rows * cols) + (y_index * cols) + x_index; - const u32 x = x_index * block_width; - - const std::span blockPtr{data.subspan(block_index * 16, 16)}; - - // Blocks can be at most 12x12 - std::array uncompData; - DecompressBlock(blockPtr, block_width, block_height, uncompData); - - u32 decompWidth = (std::min)(block_width, width - x); - u32 decompHeight = (std::min)(block_height, height - y); - - const std::span outRow = output.subspan(depth_offset + (y * width + x) * 4); - for (u32 h = 0; h < decompHeight; ++h) { - std::memcpy(outRow.data() + h * width * 4, uncompData.data() + h * block_width, decompWidth * 4); - } - } - }; - workers.QueueWork(std::move(decompress_stride)); - } - workers.WaitForRequests(); - } -} - -} // namespace Tegra::Texture::ASTC diff --git a/src/video_core/textures/astc.h b/src/video_core/textures/astc.h deleted file mode 100644 index afd3933c3e..0000000000 --- a/src/video_core/textures/astc.h +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -namespace Tegra::Texture::ASTC { - -void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, - uint32_t block_width, uint32_t block_height, std::span output); - -} // namespace Tegra::Texture::ASTC