From c9c136bea7567c825be19c1dcb481a47ee6536f6 Mon Sep 17 00:00:00 2001 From: lizzie Date: Fri, 20 Feb 2026 00:52:07 +0100 Subject: [PATCH] [texture_cache, buffer_cache] Added TLS handling + changed command queue for GPU threading. (#3579) (Merge of #3495 + #3108) This PR works around to simplify math operations on hot pointers inside the access and requests to the cache of buffers and texture cache, removing previous logic of indirection and replaced by a PoD approach. This will ensure less CPU times spended on the same request and flow directly into another chain of the render, in the same way, command queue currently uses an internal mutex that constraints the flow of data within the GPU threads, we're moving over a single command, I verified to keep using mutexes instead of internal mutex + mutex per operation, which are resolved by themselves. In simplier words, this aims to improve performance on those games and devices where the waits for next orders on GPU commands were heavier than a single verification. Co-Authored-by: @CamilleLaVey Co-Authored-by: @Lizzie Co-authored-by: CamilleLaVey Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3579 Reviewed-by: CamilleLaVey Co-authored-by: lizzie Co-committed-by: lizzie --- src/core/device_memory_manager.h | 7 ++ src/core/device_memory_manager.inc | 44 ++++++- src/video_core/buffer_cache/buffer_base.h | 8 +- src/video_core/buffer_cache/buffer_cache.h | 115 +++++++++++++----- .../buffer_cache/buffer_cache_base.h | 15 +++ src/video_core/gpu_thread.h | 5 +- .../renderer_opengl/gl_compute_pipeline.cpp | 6 +- .../renderer_opengl/gl_graphics_pipeline.cpp | 6 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 4 + .../renderer_vulkan/vk_graphics_pipeline.cpp | 4 + src/video_core/texture_cache/texture_cache.h | 114 +++++++++++------ .../texture_cache/texture_cache_base.h | 10 ++ 12 files changed, 266 insertions(+), 72 deletions(-) diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index 41227591c7..9d86a17d12 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h @@ -127,6 +127,11 @@ public: void UpdatePagesCachedBatch(std::span> ranges, s32 delta); private: + struct TranslationEntry { + DAddr guest_page{}; + u8* host_ptr{}; + }; + // Internal helper that performs the update assuming the caller already holds the necessary lock. void UpdatePagesCachedCountNoLock(DAddr addr, size_t size, s32 delta); @@ -195,6 +200,8 @@ private: } Common::VirtualBuffer cpu_backing_address; + std::array t_slot{}; + u32 cache_cursor = 0; using CounterType = u8; using CounterAtomicType = std::atomic_uint8_t; static constexpr size_t subentries = 8 / sizeof(CounterType); diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 4be26d9631..08fe799174 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -247,6 +247,7 @@ void DeviceMemoryManager::Map(DAddr address, VAddr virtual_address, size } impl->multi_dev_address.Register(new_dev, start_id); } + t_slot = {}; if (track) { TrackContinuityImpl(address, virtual_address, size, asid); } @@ -278,6 +279,7 @@ void DeviceMemoryManager::Unmap(DAddr address, size_t size) { compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG; } } + t_slot = {}; } template void DeviceMemoryManager::TrackContinuityImpl(DAddr address, VAddr virtual_address, @@ -417,6 +419,26 @@ void DeviceMemoryManager::WalkBlock(DAddr addr, std::size_t size, auto o template void DeviceMemoryManager::ReadBlock(DAddr address, void* dest_pointer, size_t size) { device_inter->FlushRegion(address, size); + const std::size_t page_offset = address & Memory::YUZU_PAGEMASK; + if (size <= Memory::YUZU_PAGESIZE - page_offset) { + const DAddr guest_page = address & ~static_cast(Memory::YUZU_PAGEMASK); + for (size_t i = 0; i < 4; ++i) { + if (t_slot[i].guest_page == guest_page && t_slot[i].host_ptr != nullptr) { + std::memcpy(dest_pointer, t_slot[i].host_ptr + page_offset, size); + return; + } + } + + const std::size_t page_index = address >> Memory::YUZU_PAGEBITS; + const auto phys_addr = compressed_physical_ptr[page_index]; + if (phys_addr != 0) { + auto* const mem_ptr = GetPointerFromRaw((PAddr(phys_addr - 1) << Memory::YUZU_PAGEBITS)); + t_slot[cache_cursor % t_slot.size()] = TranslationEntry{.guest_page = guest_page, .host_ptr = mem_ptr}; + cache_cursor = (cache_cursor + 1) & 3U; + std::memcpy(dest_pointer, mem_ptr + page_offset, size); + return; + } + } WalkBlock( address, size, [&](size_t copy_amount, DAddr current_vaddr) { @@ -455,6 +477,26 @@ void DeviceMemoryManager::WriteBlock(DAddr address, const void* src_poin template void DeviceMemoryManager::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) { + const std::size_t page_offset = address & Memory::YUZU_PAGEMASK; + if (size <= Memory::YUZU_PAGESIZE - page_offset) { + const DAddr guest_page = address & ~static_cast(Memory::YUZU_PAGEMASK); + for (size_t i = 0; i < 4; ++i) { + if (t_slot[i].guest_page == guest_page && t_slot[i].host_ptr != nullptr) { + std::memcpy(dest_pointer, t_slot[i].host_ptr + page_offset, size); + return; + } + } + + const std::size_t page_index = address >> Memory::YUZU_PAGEBITS; + const auto phys_addr = compressed_physical_ptr[page_index]; + if (phys_addr != 0) { + auto* const mem_ptr = GetPointerFromRaw((PAddr(phys_addr - 1) << Memory::YUZU_PAGEBITS)); + t_slot[cache_cursor % t_slot.size()] = TranslationEntry{.guest_page = guest_page, .host_ptr = mem_ptr}; + cache_cursor = (cache_cursor + 1) & 3U; + std::memcpy(dest_pointer, mem_ptr + page_offset, size); + return; + } + } WalkBlock( address, size, [&](size_t copy_amount, DAddr current_vaddr) { diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 40e98e3952..bec2dac246 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -39,7 +42,8 @@ public: static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS; explicit BufferBase(VAddr cpu_addr_, u64 size_bytes_) - : cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {} + : cpu_addr_cached{static_cast(cpu_addr_)}, cpu_addr{cpu_addr_}, + size_bytes{size_bytes_} {} explicit BufferBase(NullBufferParams) {} @@ -97,6 +101,8 @@ public: return cpu_addr; } + DAddr cpu_addr_cached = 0; + /// Returns the offset relative to the given CPU address /// @pre IsInBounds returns true [[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b368ffea05..7b92edecaa 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project @@ -382,6 +382,10 @@ void BufferCache

::BindHostComputeBuffers() { BindHostComputeUniformBuffers(); BindHostComputeStorageBuffers(); BindHostComputeTextureBuffers(); + if (any_buffer_uploaded) { + runtime.PostCopyBarrier(); + any_buffer_uploaded = false; + } } template @@ -763,45 +767,85 @@ void BufferCache

::BindHostIndexBuffer() { } } +template +void BufferCache

::BindHostVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, + u32 stride) { + if constexpr (IS_OPENGL) { + runtime.BindVertexBuffer(index, buffer, offset, size, stride); + } else { + runtime.BindVertexBuffer(index, buffer.Handle(), offset, size, stride); + } +} + +template +Binding& BufferCache

::VertexBufferSlot(u32 index) { + ASSERT(index < NUM_VERTEX_BUFFERS); + return v_buffer[index]; +} + +template +const Binding& BufferCache

::VertexBufferSlot(u32 index) const { + ASSERT(index < NUM_VERTEX_BUFFERS); + return v_buffer[index]; +} + +template +void BufferCache

::UpdateVertexBufferSlot(u32 index, const Binding& binding) { + Binding& slot = VertexBufferSlot(index); + if (slot.device_addr != binding.device_addr || slot.size != binding.size) { + ++vertex_buffers_serial; + } + slot = binding; + if (binding.buffer_id != NULL_BUFFER_ID && binding.size != 0) { + enabled_vertex_buffers_mask |= (1u << index); + } else { + enabled_vertex_buffers_mask &= ~(1u << index); + } +} + template void BufferCache

::BindHostVertexBuffers() { - HostBindings host_bindings; - bool any_valid{false}; auto& flags = maxwell3d->dirty.flags; - for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { - const Binding& binding = channel_state->vertex_buffers[index]; + u32 enabled_mask = enabled_vertex_buffers_mask; + HostBindings bindings{}; + u32 last_index = std::numeric_limits::max(); + const auto flush_bindings = [&]() { + if (bindings.buffers.empty()) { + return; + } + bindings.max_index = bindings.min_index + static_cast(bindings.buffers.size()); + runtime.BindVertexBuffers(bindings); + bindings = HostBindings{}; + last_index = std::numeric_limits::max(); + }; + while (enabled_mask != 0) { + const u32 index = std::countr_zero(enabled_mask); + enabled_mask &= (enabled_mask - 1); + const Binding& binding = VertexBufferSlot(index); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer, binding.buffer_id); SynchronizeBuffer(buffer, binding.device_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { + flush_bindings(); continue; } flags[Dirty::VertexBuffer0 + index] = false; - - host_bindings.min_index = (std::min)(host_bindings.min_index, index); - host_bindings.max_index = (std::max)(host_bindings.max_index, index); - any_valid = true; - } - - if (any_valid) { - host_bindings.max_index++; - for (u32 index = host_bindings.min_index; index < host_bindings.max_index; index++) { - flags[Dirty::VertexBuffer0 + index] = false; - - const Binding& binding = channel_state->vertex_buffers[index]; - Buffer& buffer = slot_buffers[binding.buffer_id]; - - const u32 stride = maxwell3d->regs.vertex_streams[index].stride; - const u32 offset = buffer.Offset(binding.device_addr); - buffer.MarkUsage(offset, binding.size); - - host_bindings.buffers.push_back(&buffer); - host_bindings.offsets.push_back(offset); - host_bindings.sizes.push_back(binding.size); - host_bindings.strides.push_back(stride); + const u32 stride = maxwell3d->regs.vertex_streams[index].stride; + const u32 offset = buffer.Offset(binding.device_addr); + buffer.MarkUsage(offset, binding.size); + if (!bindings.buffers.empty() && index != last_index + 1) { + flush_bindings(); } - runtime.BindVertexBuffers(host_bindings); + if (bindings.buffers.empty()) { + bindings.min_index = index; + } + bindings.buffers.push_back(&buffer); + bindings.offsets.push_back(offset); + bindings.sizes.push_back(binding.size); + bindings.strides.push_back(stride); + last_index = index; } + flush_bindings(); } template @@ -1205,17 +1249,20 @@ void BufferCache

::UpdateVertexBuffer(u32 index) { u32 size = address_size; // TODO: Analyze stride and number of vertices if (array.enable == 0 || size == 0 || !device_addr) { channel_state->vertex_buffers[index] = NULL_BINDING; + UpdateVertexBufferSlot(index, NULL_BINDING); return; } if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end) || size >= 64_MiB) { size = static_cast(gpu_memory->MaxContinuousRange(gpu_addr_begin, size)); } const BufferId buffer_id = FindBuffer(*device_addr, size); - channel_state->vertex_buffers[index] = Binding{ + const Binding binding{ .device_addr = *device_addr, .size = size, .buffer_id = buffer_id, }; + channel_state->vertex_buffers[index] = binding; + UpdateVertexBufferSlot(index, binding); } template @@ -1528,12 +1575,12 @@ void BufferCache

::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { template bool BufferCache

::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size) { - boost::container::small_vector copies; + upload_copies.clear(); u64 total_size_bytes = 0; u64 largest_copy = 0; - DAddr buffer_start = buffer.CpuAddr(); + const DAddr buffer_start = buffer.cpu_addr_cached; memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) { - copies.push_back(BufferCopy{ + upload_copies.push_back(BufferCopy{ .src_offset = total_size_bytes, .dst_offset = device_addr_out - buffer_start, .size = range_size, @@ -1544,8 +1591,9 @@ bool BufferCache

::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 si if (total_size_bytes == 0) { return true; } - const std::span copies_span(copies.data(), copies.size()); + const std::span copies_span(upload_copies.data(), upload_copies.size()); UploadMemory(buffer, total_size_bytes, largest_copy, copies_span); + any_buffer_uploaded = true; return false; } @@ -1735,6 +1783,7 @@ void BufferCache

::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { auto& binding = channel_state->vertex_buffers[index]; if (binding.buffer_id == buffer_id) { binding.buffer_id = BufferId{}; + UpdateVertexBufferSlot(index, binding); dirty_vertex_buffers.push_back(index); } } diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 3c0bc81758..0596329392 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -320,6 +320,7 @@ public: std::recursive_mutex mutex; Runtime& runtime; + bool any_buffer_uploaded = false; private: template @@ -372,6 +373,8 @@ private: void BindHostTransformFeedbackBuffers(); + void BindHostVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride); + void BindHostComputeUniformBuffers(); void BindHostComputeStorageBuffers(); @@ -453,6 +456,12 @@ private: [[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept; + [[nodiscard]] Binding& VertexBufferSlot(u32 index); + + [[nodiscard]] const Binding& VertexBufferSlot(u32 index) const; + + void UpdateVertexBufferSlot(u32 index, const Binding& binding); + void ClearDownload(DAddr base_addr, u64 size); void InlineMemoryImplementation(DAddr dest_address, size_t copy_size, @@ -472,6 +481,12 @@ private: u32 last_index_count = 0; + u32 enabled_vertex_buffers_mask = 0; + u64 vertex_buffers_serial = 0; + std::array v_buffer{}; + + boost::container::small_vector upload_copies; + MemoryTracker memory_tracker; Common::RangeSet uncommitted_gpu_modified_ranges; Common::RangeSet gpu_modified_ranges; diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index dc0fce9f82..ac1283a338 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -89,7 +92,7 @@ struct CommandDataContainer { /// Struct used to synchronize the GPU thread struct SynchState final { - using CommandQueue = Common::MPSCQueue; + using CommandQueue = Common::SPSCQueue; std::mutex write_lock; CommandQueue queue; u64 last_fence{}; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 661fbef2b0..d1c61be743 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -189,6 +189,10 @@ void ComputePipeline::Configure() { buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers); buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); buffer_cache.BindHostComputeBuffers(); + if (buffer_cache.any_buffer_uploaded) { + buffer_cache.runtime.PostCopyBarrier(); + buffer_cache.any_buffer_uploaded = false; + } const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers + num_image_buffers}; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 2abbd0de78..ee3498428e 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -558,6 +558,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { if (image_binding != 0) { glBindImageTextures(0, image_binding, images.data()); } + if (buffer_cache.any_buffer_uploaded) { + buffer_cache.runtime.PostCopyBarrier(); + buffer_cache.any_buffer_uploaded = false; + } return true; } diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 96a9fe59e7..51b5141a06 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -203,6 +203,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); + if (buffer_cache.any_buffer_uploaded) { + buffer_cache.runtime.PostCopyBarrier(); + buffer_cache.any_buffer_uploaded = false; + } RescalingPushConstant rescaling; const VideoCommon::SamplerId* samplers_it{samplers.data()}; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d36553da4a..d156baa77b 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -496,6 +496,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { prepare_stage(4); } + if (buffer_cache.any_buffer_uploaded) { + buffer_cache.runtime.PostCopyBarrier(); + buffer_cache.any_buffer_uploaded = false; + } texture_cache.UpdateRenderTargets(false); texture_cache.CheckFeedbackLoop(views); ConfigureDraw(rescaling, render_area); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 9f31c68cd2..71210ffe6e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -291,55 +291,59 @@ void TextureCache

::CheckFeedbackLoop(std::span views) { return; } + if (render_targets_serial == last_feedback_loop_serial && + texture_bindings_serial == last_feedback_texture_serial) { + if (last_feedback_loop_result) { + runtime.BarrierFeedbackLoop(); + } + return; + } + + if (rt_active_mask == 0) { + last_feedback_loop_serial = render_targets_serial; + last_feedback_texture_serial = texture_bindings_serial; + last_feedback_loop_result = false; + return; + } + const u32 depth_bit = 1u << NUM_RT; + const bool depth_active = (rt_active_mask & depth_bit) != 0; + const bool requires_barrier = [&] { for (const auto& view : views) { if (!view.id) { continue; } - bool is_render_target = false; - - for (const auto& ct_view_id : render_targets.color_buffer_ids) { - if (ct_view_id && ct_view_id == view.id) { - is_render_target = true; - break; - } - } - - if (!is_render_target && render_targets.depth_buffer_id == view.id) { - is_render_target = true; - } - - if (is_render_target) { - continue; - } - - auto& image_view = slot_image_views[view.id]; - - for (const auto& ct_view_id : render_targets.color_buffer_ids) { - if (!ct_view_id) { + { + bool is_continue = false; + for (size_t i = 0; i < 8; ++i) + is_continue |= (rt_active_mask & (1u << i)) && view.id == render_targets.color_buffer_ids[i]; + if (is_continue) continue; - } - - auto& ct_view = slot_image_views[ct_view_id]; - - if (image_view.image_id == ct_view.image_id) { - return true; - } } - if (render_targets.depth_buffer_id) { - auto& zt_view = slot_image_views[render_targets.depth_buffer_id]; + if (depth_active && view.id == render_targets.depth_buffer_id) + continue; - if (image_view.image_id == zt_view.image_id) { - return true; - } + const ImageId view_image_id = slot_image_views[view.id].image_id; + { + bool is_continue = false; + for (size_t i = 0; i < 8; ++i) + is_continue |= (rt_active_mask & (1u << i)) && view_image_id == rt_image_id[i]; + if (is_continue) + continue; + } + if (depth_active && view_image_id == rt_depth_image_id) { + return true; } } return false; }(); + last_feedback_loop_serial = render_targets_serial; + last_feedback_texture_serial = texture_bindings_serial; + last_feedback_loop_result = requires_barrier; if (requires_barrier) { runtime.BarrierFeedbackLoop(); } @@ -399,13 +403,19 @@ void TextureCache

::SynchronizeGraphicsDescriptors() { const bool linked_tsc = maxwell3d->regs.sampler_binding == SamplerBinding::ViaHeaderBinding; const u32 tic_limit = maxwell3d->regs.tex_header.limit; const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tex_sampler.limit; + bool bindings_changed = false; if (channel_state->graphics_sampler_table.Synchronize(maxwell3d->regs.tex_sampler.Address(), tsc_limit)) { channel_state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + bindings_changed = true; } if (channel_state->graphics_image_table.Synchronize(maxwell3d->regs.tex_header.Address(), tic_limit)) { channel_state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + bindings_changed = true; + } + if (bindings_changed) { + ++texture_bindings_serial; } } @@ -415,12 +425,18 @@ void TextureCache

::SynchronizeComputeDescriptors() { const u32 tic_limit = kepler_compute->regs.tic.limit; const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address(); + bool bindings_changed = false; if (channel_state->compute_sampler_table.Synchronize(tsc_gpu_addr, tsc_limit)) { channel_state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + bindings_changed = true; } if (channel_state->compute_image_table.Synchronize(kepler_compute->regs.tic.Address(), tic_limit)) { channel_state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + bindings_changed = true; + } + if (bindings_changed) { + ++texture_bindings_serial; } } @@ -534,6 +550,7 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { return; } + const VideoCommon::RenderTargets previous_render_targets = render_targets; const bool rescaled = RescaleRenderTargets(); if (is_rescaling != rescaled) { flags[Dirty::RescaleViewports] = true; @@ -549,6 +566,21 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + rt_active_mask = 0; + rt_image_id = {}; + for (size_t i = 0; i < rt_image_id.size(); ++i) { + if (ImageViewId const view = render_targets.color_buffer_ids[i]; view) { + rt_active_mask |= 1u << i; + rt_image_id[i] = slot_image_views[view].image_id; + } + } + if (depth_buffer_id) { + rt_active_mask |= (1u << NUM_RT); + rt_depth_image_id = slot_image_views[depth_buffer_id].image_id; + } else { + rt_depth_image_id = ImageId{}; + } + for (size_t index = 0; index < NUM_RT; ++index) { render_targets.draw_buffers[index] = static_cast(maxwell3d->regs.rt_control.Map(index)); } @@ -564,12 +596,22 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { }; render_targets.is_rescaled = is_rescaling; + if (render_targets != previous_render_targets) { + ++render_targets_serial; + } + flags[Dirty::DepthBiasGlobal] = true; } template typename P::Framebuffer* TextureCache

::GetFramebuffer() { - return &slot_framebuffers[GetFramebufferId(render_targets)]; + if (last_framebuffer_id && last_framebuffer_serial == render_targets_serial) { + return &slot_framebuffers[last_framebuffer_id]; + } + const FramebufferId framebuffer_id = GetFramebufferId(render_targets); + last_framebuffer_id = framebuffer_id; + last_framebuffer_serial = render_targets_serial; + return &slot_framebuffers[framebuffer_id]; } template @@ -2610,6 +2652,10 @@ void TextureCache

::RemoveFramebuffers(std::span removed_vi if (it->first.Contains(removed_views)) { auto framebuffer_id = it->second; ASSERT(framebuffer_id); + if (framebuffer_id == last_framebuffer_id) { + last_framebuffer_id = {}; + last_framebuffer_serial = 0; + } sentenced_framebuffers.Push(std::move(slot_framebuffers[framebuffer_id])); it = framebuffers.erase(it); } else { diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index cff300da8f..4b4061f21d 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -455,6 +455,16 @@ private: std::deque gpu_page_table_storage; RenderTargets render_targets; + u64 render_targets_serial = 0; + u32 rt_active_mask = 0; + std::array rt_image_id{}; + ImageId rt_depth_image_id{}; + u64 texture_bindings_serial = 0; + u64 last_feedback_loop_serial = 0; + u64 last_feedback_texture_serial = 0; + bool last_feedback_loop_result = false; + FramebufferId last_framebuffer_id{}; + u64 last_framebuffer_serial = 0; ankerl::unordered_dense::map framebuffers; ankerl::unordered_dense::map, Common::IdentityHash> page_table;