Revert "[texture_cache, buffer_cache] Rebuild modified pages in texture cache"
This commit is contained in:
parent
413cebca20
commit
420b002448
|
|
@ -162,18 +162,14 @@ std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(DA
|
|||
DAddr device_addr_end_aligned = Common::AlignUp(device_addr + size, Core::DEVICE_PAGESIZE);
|
||||
area->start_address = device_addr_start_aligned;
|
||||
area->end_address = device_addr_end_aligned;
|
||||
const u64 aligned_size = device_addr_end_aligned - device_addr_start_aligned;
|
||||
const bool has_dirty_pages = IsRegionGpuModified(device_addr_start_aligned, aligned_size);
|
||||
if (!has_dirty_pages) {
|
||||
if (memory_tracker.IsRegionPreflushable(device_addr, size)) {
|
||||
area->preemtive = true;
|
||||
return area;
|
||||
}
|
||||
if (memory_tracker.IsRegionPreflushable(device_addr_start_aligned, aligned_size)) {
|
||||
area->preemtive = true;
|
||||
return area;
|
||||
}
|
||||
area->preemtive = false;
|
||||
memory_tracker.MarkRegionAsPreflushable(device_addr_start_aligned, aligned_size);
|
||||
};
|
||||
area->preemtive = !IsRegionGpuModified(device_addr_start_aligned,
|
||||
device_addr_end_aligned - device_addr_start_aligned);
|
||||
memory_tracker.MarkRegionAsPreflushable(device_addr_start_aligned,
|
||||
device_addr_end_aligned - device_addr_start_aligned);
|
||||
return area;
|
||||
}
|
||||
|
||||
|
|
@ -600,15 +596,11 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
|||
it++;
|
||||
}
|
||||
|
||||
Common::RangeSet<DAddr> merged_committed_ranges;
|
||||
for (const Common::RangeSet<DAddr>& range_set : committed_gpu_modified_ranges) {
|
||||
range_set.ForEach([&](DAddr start, DAddr end) { merged_committed_ranges.Add(start, end - start); });
|
||||
}
|
||||
|
||||
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 16> downloads;
|
||||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
merged_committed_ranges.ForEach([&](DAddr interval_lower, DAddr interval_upper) {
|
||||
for (const Common::RangeSet<DAddr>& range_set : committed_gpu_modified_ranges) {
|
||||
range_set.ForEach([&](DAddr interval_lower, DAddr interval_upper) {
|
||||
const std::size_t size = interval_upper - interval_lower;
|
||||
const DAddr device_addr = interval_lower;
|
||||
ForEachBufferInRange(device_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||
|
|
@ -616,11 +608,11 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
|||
const DAddr buffer_end = buffer_start + buffer.SizeBytes();
|
||||
const DAddr new_start = (std::max)(buffer_start, device_addr);
|
||||
const DAddr new_end = (std::min)(buffer_end, device_addr + size);
|
||||
memory_tracker.ForEachDownloadRange(new_start, new_end - new_start, false,
|
||||
memory_tracker.ForEachDownloadRange(
|
||||
new_start, new_end - new_start, false,
|
||||
[&](u64 device_addr_out, u64 range_size) {
|
||||
const DAddr buffer_addr = buffer.CpuAddr();
|
||||
const auto add_download = [&](DAddr start,
|
||||
DAddr end) {
|
||||
const auto add_download = [&](DAddr start, DAddr end) {
|
||||
const u64 new_offset = start - buffer_addr;
|
||||
const u64 new_size = end - start;
|
||||
downloads.push_back({
|
||||
|
|
@ -634,17 +626,16 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
|||
// Align up to avoid cache conflicts
|
||||
constexpr u64 align = 64ULL;
|
||||
constexpr u64 mask = ~(align - 1ULL);
|
||||
total_size_bytes +=
|
||||
(new_size + align - 1) & mask;
|
||||
largest_copy =
|
||||
(std::max)(largest_copy, new_size);
|
||||
total_size_bytes += (new_size + align - 1) & mask;
|
||||
largest_copy = (std::max)(largest_copy, new_size);
|
||||
};
|
||||
|
||||
gpu_modified_ranges.ForEachInRange(
|
||||
device_addr_out, range_size, add_download);
|
||||
gpu_modified_ranges.ForEachInRange(device_addr_out, range_size,
|
||||
add_download);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
committed_gpu_modified_ranges.clear();
|
||||
if (downloads.empty()) {
|
||||
async_buffers.emplace_back(std::optional<Async_Buffer>{});
|
||||
|
|
@ -698,19 +689,10 @@ void BufferCache<P>::PopAsyncBuffers() {
|
|||
const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
|
||||
const u64 dst_offset = copy.dst_offset - base_offset;
|
||||
const u8* read_mapped_memory = base + dst_offset;
|
||||
boost::container::small_vector<std::pair<DAddr, DAddr>, 8> merged_write_ranges;
|
||||
async_downloads.ForEachInRange(device_addr, copy.size, [&](DAddr start, DAddr end, s32) {
|
||||
if (!merged_write_ranges.empty() && merged_write_ranges.back().second >= start) {
|
||||
merged_write_ranges.back().second =
|
||||
(std::max)(merged_write_ranges.back().second, end);
|
||||
return;
|
||||
}
|
||||
merged_write_ranges.emplace_back(start, end);
|
||||
});
|
||||
for (const auto& [start, end] : merged_write_ranges) {
|
||||
device_memory.WriteBlockUnsafe(start, &read_mapped_memory[start - device_addr],
|
||||
end - start);
|
||||
}
|
||||
});
|
||||
async_downloads.Subtract(device_addr, copy.size, [&](DAddr start, DAddr end) {
|
||||
ranges_to_remove.Add(start, end - start);
|
||||
});
|
||||
|
|
|
|||
|
|
@ -711,16 +711,12 @@ void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
|
|||
runtime.Finish();
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
|
||||
swizzle_data_buffer);
|
||||
RebuildGpuModifiedPagesInRange(image.cpu_addr, image.cpu_addr_end - image.cpu_addr);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::optional<VideoCore::RasterizerDownloadArea> TextureCache<P>::GetFlushArea(DAddr cpu_addr,
|
||||
u64 size) {
|
||||
if (!HasGpuModifiedPagesInRange(cpu_addr, size)) {
|
||||
return std::nullopt;
|
||||
}
|
||||
std::optional<VideoCore::RasterizerDownloadArea> area{};
|
||||
ForEachImageInRegion(cpu_addr, size, [&](ImageId, ImageBase& image) {
|
||||
if (False(image.flags & ImageFlagBits::GpuModified)) {
|
||||
|
|
@ -1111,9 +1107,6 @@ bool TextureCache<P>::IsRescaling(const ImageViewBase& image_view) const noexcep
|
|||
|
||||
template <class P>
|
||||
bool TextureCache<P>::IsRegionGpuModified(DAddr addr, size_t size) {
|
||||
if (!HasGpuModifiedPagesInRange(addr, size)) {
|
||||
return false;
|
||||
}
|
||||
bool is_modified = false;
|
||||
ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) {
|
||||
if (False(image.flags & ImageFlagBits::GpuModified)) {
|
||||
|
|
@ -1125,24 +1118,6 @@ bool TextureCache<P>::IsRegionGpuModified(DAddr addr, size_t size) {
|
|||
return is_modified;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
bool TextureCache<P>::HasGpuModifiedPagesInRange(DAddr addr, size_t size) const {
|
||||
bool has_dirty_page = false;
|
||||
gpu_modified_pages.ForEachInRange(addr, size, [&](DAddr, DAddr) { has_dirty_page = true; });
|
||||
return has_dirty_page;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::RebuildGpuModifiedPagesInRange(DAddr addr, size_t size) {
|
||||
gpu_modified_pages.Subtract(addr, size);
|
||||
ForEachImageInRegion(addr, size, [this](ImageId, ImageBase& image) {
|
||||
if (False(image.flags & ImageFlagBits::GpuModified)) {
|
||||
return;
|
||||
}
|
||||
gpu_modified_pages.Add(image.cpu_addr, image.cpu_addr_end - image.cpu_addr);
|
||||
});
|
||||
}
|
||||
|
||||
template <class P>
|
||||
std::pair<typename TextureCache<P>::Image*, BufferImageCopy> TextureCache<P>::DmaBufferImageCopy(
|
||||
const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand,
|
||||
|
|
@ -1897,7 +1872,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
|
|||
}
|
||||
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||
new_image.flags |= ImageFlagBits::GpuModified;
|
||||
gpu_modified_pages.Add(new_image.cpu_addr, new_image.cpu_addr_end - new_image.cpu_addr);
|
||||
const auto& resolution = Settings::values.resolution_info;
|
||||
const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value();
|
||||
const u32 up_scale = can_rescale ? resolution.up_scale : 1;
|
||||
|
|
@ -2569,9 +2543,6 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
|
|||
template <class P>
|
||||
void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
||||
ImageBase& image = slot_images[image_id];
|
||||
const bool was_gpu_modified = True(image.flags & ImageFlagBits::GpuModified);
|
||||
const DAddr image_cpu_addr = image.cpu_addr;
|
||||
const size_t image_cpu_size = image.cpu_addr_end - image.cpu_addr;
|
||||
if (image.HasScaled()) {
|
||||
total_used_memory -= GetScaledImageSizeBytes(image);
|
||||
}
|
||||
|
|
@ -2660,9 +2631,6 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
|||
channel_info.compute_image_table.Invalidate();
|
||||
}
|
||||
has_deleted_images = true;
|
||||
if (was_gpu_modified) {
|
||||
RebuildGpuModifiedPagesInRange(image_cpu_addr, image_cpu_size);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
|
@ -2703,7 +2671,6 @@ void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_vi
|
|||
template <class P>
|
||||
void TextureCache<P>::MarkModification(ImageBase& image) noexcept {
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
gpu_modified_pages.Add(image.cpu_addr, image.cpu_addr_end - image.cpu_addr);
|
||||
image.modification_tick = ++modification_tick;
|
||||
}
|
||||
|
||||
|
|
@ -2737,7 +2704,6 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
|
|||
image.modification_tick = most_recent_tick;
|
||||
if (any_modified) {
|
||||
image.flags |= ImageFlagBits::GpuModified;
|
||||
gpu_modified_pages.Add(image.cpu_addr, image.cpu_addr_end - image.cpu_addr);
|
||||
}
|
||||
std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) {
|
||||
const ImageBase& lhs_image = slot_images[lhs->id];
|
||||
|
|
@ -2765,11 +2731,7 @@ template <class P>
|
|||
void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) {
|
||||
Image& image = slot_images[image_id];
|
||||
if (invalidate) {
|
||||
const bool was_gpu_modified = True(image.flags & ImageFlagBits::GpuModified);
|
||||
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
|
||||
if (was_gpu_modified) {
|
||||
RebuildGpuModifiedPagesInRange(image.cpu_addr, image.cpu_addr_end - image.cpu_addr);
|
||||
}
|
||||
if (False(image.flags & ImageFlagBits::Tracked)) {
|
||||
TrackImage(image, image_id);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,6 @@
|
|||
#include "common/hash.h"
|
||||
#include "common/literals.h"
|
||||
#include "common/lru_cache.h"
|
||||
#include "common/page_bitset_range_set.h"
|
||||
#include <ranges>
|
||||
#include "common/scratch_buffer.h"
|
||||
#include "common/slot_vector.h"
|
||||
|
|
@ -386,9 +385,6 @@ private:
|
|||
template <typename Func>
|
||||
void ForEachSparseSegment(ImageBase& image, Func&& func);
|
||||
|
||||
[[nodiscard]] bool HasGpuModifiedPagesInRange(DAddr addr, size_t size) const;
|
||||
void RebuildGpuModifiedPagesInRange(DAddr addr, size_t size);
|
||||
|
||||
/// Find or create an image view in the given image with the passed parameters
|
||||
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
|
||||
|
||||
|
|
@ -472,7 +468,6 @@ private:
|
|||
|
||||
ankerl::unordered_dense::map<RenderTargets, FramebufferId> framebuffers;
|
||||
ankerl::unordered_dense::map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
|
||||
Common::PageBitsetRangeSet<DAddr, YUZU_PAGEBITS, (1ULL << 34)> gpu_modified_pages;
|
||||
ankerl::unordered_dense::map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
|
||||
|
||||
DAddr virtual_invalid_space{};
|
||||
|
|
|
|||
Loading…
Reference in New Issue