From e0daa0d83d80314d5225802e22013b3063d1a293 Mon Sep 17 00:00:00 2001 From: CamilleLaVey Date: Fri, 13 Feb 2026 18:54:09 -0400 Subject: [PATCH] [texture_cache, buffer_cache] Rebuild modified pages in texture cache --- src/video_core/buffer_cache/buffer_cache.h | 102 ++++++++++-------- src/video_core/texture_cache/texture_cache.h | 38 +++++++ .../texture_cache/texture_cache_base.h | 5 + 3 files changed, 103 insertions(+), 42 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index aea92f4c38..3becc192fc 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -162,14 +162,18 @@ std::optional BufferCache

::GetFlushArea(DA DAddr device_addr_end_aligned = Common::AlignUp(device_addr + size, Core::DEVICE_PAGESIZE); area->start_address = device_addr_start_aligned; area->end_address = device_addr_end_aligned; - if (memory_tracker.IsRegionPreflushable(device_addr, size)) { + const u64 aligned_size = device_addr_end_aligned - device_addr_start_aligned; + const bool has_dirty_pages = IsRegionGpuModified(device_addr_start_aligned, aligned_size); + if (!has_dirty_pages) { area->preemtive = true; return area; - }; - area->preemtive = !IsRegionGpuModified(device_addr_start_aligned, - device_addr_end_aligned - device_addr_start_aligned); - memory_tracker.MarkRegionAsPreflushable(device_addr_start_aligned, - device_addr_end_aligned - device_addr_start_aligned); + } + if (memory_tracker.IsRegionPreflushable(device_addr_start_aligned, aligned_size)) { + area->preemtive = true; + return area; + } + area->preemtive = false; + memory_tracker.MarkRegionAsPreflushable(device_addr_start_aligned, aligned_size); return area; } @@ -596,46 +600,51 @@ void BufferCache

::CommitAsyncFlushesHigh() { it++; } + Common::RangeSet merged_committed_ranges; + for (const Common::RangeSet& range_set : committed_gpu_modified_ranges) { + range_set.ForEach([&](DAddr start, DAddr end) { merged_committed_ranges.Add(start, end - start); }); + } + boost::container::small_vector, 16> downloads; u64 total_size_bytes = 0; u64 largest_copy = 0; - for (const Common::RangeSet& range_set : committed_gpu_modified_ranges) { - range_set.ForEach([&](DAddr interval_lower, DAddr interval_upper) { - const std::size_t size = interval_upper - interval_lower; - const DAddr device_addr = interval_lower; - ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { - const DAddr buffer_start = buffer.CpuAddr(); - const DAddr buffer_end = buffer_start + buffer.SizeBytes(); - const DAddr new_start = (std::max)(buffer_start, device_addr); - const DAddr new_end = (std::min)(buffer_end, device_addr + size); - memory_tracker.ForEachDownloadRange( - new_start, new_end - new_start, false, - [&](u64 device_addr_out, u64 range_size) { - const DAddr buffer_addr = buffer.CpuAddr(); - const auto add_download = [&](DAddr start, DAddr end) { - const u64 new_offset = start - buffer_addr; - const u64 new_size = end - start; - downloads.push_back({ - BufferCopy{ - .src_offset = new_offset, - .dst_offset = total_size_bytes, - .size = new_size, - }, - buffer_id, - }); - // Align up to avoid cache conflicts - constexpr u64 align = 64ULL; - constexpr u64 mask = ~(align - 1ULL); - total_size_bytes += (new_size + align - 1) & mask; - largest_copy = (std::max)(largest_copy, new_size); - }; + merged_committed_ranges.ForEach([&](DAddr interval_lower, DAddr interval_upper) { + const std::size_t size = interval_upper - interval_lower; + const DAddr device_addr = interval_lower; + ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) { + const DAddr buffer_start = buffer.CpuAddr(); + const DAddr buffer_end = buffer_start + buffer.SizeBytes(); + const DAddr new_start = (std::max)(buffer_start, device_addr); + const DAddr new_end = (std::min)(buffer_end, device_addr + size); + memory_tracker.ForEachDownloadRange(new_start, new_end - new_start, false, + [&](u64 device_addr_out, u64 range_size) { + const DAddr buffer_addr = buffer.CpuAddr(); + const auto add_download = [&](DAddr start, + DAddr end) { + const u64 new_offset = start - buffer_addr; + const u64 new_size = end - start; + downloads.push_back({ + BufferCopy{ + .src_offset = new_offset, + .dst_offset = total_size_bytes, + .size = new_size, + }, + buffer_id, + }); + // Align up to avoid cache conflicts + constexpr u64 align = 64ULL; + constexpr u64 mask = ~(align - 1ULL); + total_size_bytes += + (new_size + align - 1) & mask; + largest_copy = + (std::max)(largest_copy, new_size); + }; - gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, - add_download); - }); - }); + gpu_modified_ranges.ForEachInRange( + device_addr_out, range_size, add_download); + }); }); - } + }); committed_gpu_modified_ranges.clear(); if (downloads.empty()) { async_buffers.emplace_back(std::optional{}); @@ -689,10 +698,19 @@ void BufferCache

::PopAsyncBuffers() { const DAddr device_addr = static_cast(copy.src_offset); const u64 dst_offset = copy.dst_offset - base_offset; const u8* read_mapped_memory = base + dst_offset; + boost::container::small_vector, 8> merged_write_ranges; async_downloads.ForEachInRange(device_addr, copy.size, [&](DAddr start, DAddr end, s32) { + if (!merged_write_ranges.empty() && merged_write_ranges.back().second >= start) { + merged_write_ranges.back().second = + (std::max)(merged_write_ranges.back().second, end); + return; + } + merged_write_ranges.emplace_back(start, end); + }); + for (const auto& [start, end] : merged_write_ranges) { device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr], end - start); - }); + } async_downloads.Subtract(device_addr, copy.size, [&](DAddr start, DAddr end) { ranges_to_remove.Add(start, end - start); }); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 44373e84c2..1c9a3b193c 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -711,12 +711,16 @@ void TextureCache

::DownloadMemory(DAddr cpu_addr, size_t size) { runtime.Finish(); SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, swizzle_data_buffer); + RebuildGpuModifiedPagesInRange(image.cpu_addr, image.cpu_addr_end - image.cpu_addr); } } template std::optional TextureCache

::GetFlushArea(DAddr cpu_addr, u64 size) { + if (!HasGpuModifiedPagesInRange(cpu_addr, size)) { + return std::nullopt; + } std::optional area{}; ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) { if (False(image.flags & ImageFlagBits::GpuModified)) { @@ -1107,6 +1111,9 @@ bool TextureCache

::IsRescaling(const ImageViewBase& image_view) const noexcep template bool TextureCache

::IsRegionGpuModified(DAddr addr, size_t size) { + if (!HasGpuModifiedPagesInRange(addr, size)) { + return false; + } bool is_modified = false; ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { if (False(image.flags & ImageFlagBits::GpuModified)) { @@ -1118,6 +1125,24 @@ bool TextureCache

::IsRegionGpuModified(DAddr addr, size_t size) { return is_modified; } +template +bool TextureCache

::HasGpuModifiedPagesInRange(DAddr addr, size_t size) const { + bool has_dirty_page = false; + gpu_modified_pages.ForEachInRange(addr, size, [&](DAddr, DAddr) { has_dirty_page = true; }); + return has_dirty_page; +} + +template +void TextureCache

::RebuildGpuModifiedPagesInRange(DAddr addr, size_t size) { + gpu_modified_pages.Subtract(addr, size); + ForEachImageInRegion(addr, size, [this](ImageId, ImageBase& image) { + if (False(image.flags & ImageFlagBits::GpuModified)) { + return; + } + gpu_modified_pages.Add(image.cpu_addr, image.cpu_addr_end - image.cpu_addr); + }); +} + template std::pair::Image*, BufferImageCopy> TextureCache

::DmaBufferImageCopy( const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand, @@ -1872,6 +1897,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA } if (True(overlap.flags & ImageFlagBits::GpuModified)) { new_image.flags |= ImageFlagBits::GpuModified; + gpu_modified_pages.Add(new_image.cpu_addr, new_image.cpu_addr_end - new_image.cpu_addr); const auto& resolution = Settings::values.resolution_info; const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); const u32 up_scale = can_rescale ? resolution.up_scale : 1; @@ -2543,6 +2569,9 @@ void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { template void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { ImageBase& image = slot_images[image_id]; + const bool was_gpu_modified = True(image.flags & ImageFlagBits::GpuModified); + const DAddr image_cpu_addr = image.cpu_addr; + const size_t image_cpu_size = image.cpu_addr_end - image.cpu_addr; if (image.HasScaled()) { total_used_memory -= GetScaledImageSizeBytes(image); } @@ -2631,6 +2660,9 @@ void TextureCache

::DeleteImage(ImageId image_id, bool immediate_delete) { channel_info.compute_image_table.Invalidate(); } has_deleted_images = true; + if (was_gpu_modified) { + RebuildGpuModifiedPagesInRange(image_cpu_addr, image_cpu_size); + } } template @@ -2671,6 +2703,7 @@ void TextureCache

::RemoveFramebuffers(std::span removed_vi template void TextureCache

::MarkModification(ImageBase& image) noexcept { image.flags |= ImageFlagBits::GpuModified; + gpu_modified_pages.Add(image.cpu_addr, image.cpu_addr_end - image.cpu_addr); image.modification_tick = ++modification_tick; } @@ -2704,6 +2737,7 @@ void TextureCache

::SynchronizeAliases(ImageId image_id) { image.modification_tick = most_recent_tick; if (any_modified) { image.flags |= ImageFlagBits::GpuModified; + gpu_modified_pages.Add(image.cpu_addr, image.cpu_addr_end - image.cpu_addr); } std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { const ImageBase& lhs_image = slot_images[lhs->id]; @@ -2731,7 +2765,11 @@ template void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { Image& image = slot_images[image_id]; if (invalidate) { + const bool was_gpu_modified = True(image.flags & ImageFlagBits::GpuModified); image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); + if (was_gpu_modified) { + RebuildGpuModifiedPagesInRange(image.cpu_addr, image.cpu_addr_end - image.cpu_addr); + } if (False(image.flags & ImageFlagBits::Tracked)) { TrackImage(image, image_id); } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 4b4061f21d..3859ddda8c 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -23,6 +23,7 @@ #include "common/hash.h" #include "common/literals.h" #include "common/lru_cache.h" +#include "common/page_bitset_range_set.h" #include #include "common/scratch_buffer.h" #include "common/slot_vector.h" @@ -385,6 +386,9 @@ private: template void ForEachSparseSegment(ImageBase& image, Func&& func); + [[nodiscard]] bool HasGpuModifiedPagesInRange(DAddr addr, size_t size) const; + void RebuildGpuModifiedPagesInRange(DAddr addr, size_t size); + /// Find or create an image view in the given image with the passed parameters [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); @@ -468,6 +472,7 @@ private: ankerl::unordered_dense::map framebuffers; ankerl::unordered_dense::map, Common::IdentityHash> page_table; + Common::PageBitsetRangeSet gpu_modified_pages; ankerl::unordered_dense::map> sparse_views; DAddr virtual_invalid_space{};