[buffer_cache, memory, pipeline] Added translation entry structure and update buffer cache handling

This commit is contained in:
CamilleLaVey 2026-02-10 22:24:21 -04:00 committed by crueter
parent ef730ed490
commit 901f556af5
9 changed files with 407 additions and 36 deletions

View File

@ -127,6 +127,11 @@ public:
void UpdatePagesCachedBatch(std::span<const std::pair<DAddr, size_t>> ranges, s32 delta);
private:
struct TranslationEntry {
DAddr guest_page{};
u8* host_ptr{};
};
// Internal helper that performs the update assuming the caller already holds the necessary lock.
void UpdatePagesCachedCountNoLock(DAddr addr, size_t size, s32 delta);
@ -195,6 +200,11 @@ private:
}
Common::VirtualBuffer<VAddr> cpu_backing_address;
inline static thread_local TranslationEntry t_slot0{};
inline static thread_local TranslationEntry t_slot1{};
inline static thread_local TranslationEntry t_slot2{};
inline static thread_local TranslationEntry t_slot3{};
inline static thread_local u32 cache_cursor = 0;
using CounterType = u8;
using CounterAtomicType = std::atomic_uint8_t;
static constexpr size_t subentries = 8 / sizeof(CounterType);

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
@ -247,6 +247,10 @@ void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size
}
impl->multi_dev_address.Register(new_dev, start_id);
}
t_slot0 = {};
t_slot1 = {};
t_slot2 = {};
t_slot3 = {};
if (track) {
TrackContinuityImpl(address, virtual_address, size, asid);
}
@ -278,6 +282,10 @@ void DeviceMemoryManager<Traits>::Unmap(DAddr address, size_t size) {
compressed_device_addr[phys_addr - 1] = new_start | MULTI_FLAG;
}
}
t_slot0 = {};
t_slot1 = {};
t_slot2 = {};
t_slot3 = {};
}
template <typename Traits>
void DeviceMemoryManager<Traits>::TrackContinuityImpl(DAddr address, VAddr virtual_address,
@ -417,6 +425,50 @@ void DeviceMemoryManager<Traits>::WalkBlock(DAddr addr, std::size_t size, auto o
template <typename Traits>
void DeviceMemoryManager<Traits>::ReadBlock(DAddr address, void* dest_pointer, size_t size) {
device_inter->FlushRegion(address, size);
const std::size_t page_offset = address & Memory::YUZU_PAGEMASK;
if (size <= Memory::YUZU_PAGESIZE - page_offset) {
const DAddr guest_page = address & ~static_cast<DAddr>(Memory::YUZU_PAGEMASK);
if (t_slot0.guest_page == guest_page && t_slot0.host_ptr != nullptr) {
std::memcpy(dest_pointer, t_slot0.host_ptr + page_offset, size);
return;
}
if (t_slot1.guest_page == guest_page && t_slot1.host_ptr != nullptr) {
std::memcpy(dest_pointer, t_slot1.host_ptr + page_offset, size);
return;
}
if (t_slot2.guest_page == guest_page && t_slot2.host_ptr != nullptr) {
std::memcpy(dest_pointer, t_slot2.host_ptr + page_offset, size);
return;
}
if (t_slot3.guest_page == guest_page && t_slot3.host_ptr != nullptr) {
std::memcpy(dest_pointer, t_slot3.host_ptr + page_offset, size);
return;
}
const std::size_t page_index = address >> Memory::YUZU_PAGEBITS;
const auto phys_addr = compressed_physical_ptr[page_index];
if (phys_addr != 0) {
auto* const mem_ptr = GetPointerFromRaw<u8>(
(static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS));
switch (cache_cursor & 3U) {
case 0:
t_slot0 = TranslationEntry{.guest_page = guest_page, .host_ptr = mem_ptr};
break;
case 1:
t_slot1 = TranslationEntry{.guest_page = guest_page, .host_ptr = mem_ptr};
break;
case 2:
t_slot2 = TranslationEntry{.guest_page = guest_page, .host_ptr = mem_ptr};
break;
default:
t_slot3 = TranslationEntry{.guest_page = guest_page, .host_ptr = mem_ptr};
break;
}
cache_cursor = (cache_cursor + 1) & 3U;
std::memcpy(dest_pointer, mem_ptr + page_offset, size);
return;
}
}
WalkBlock(
address, size,
[&](size_t copy_amount, DAddr current_vaddr) {
@ -455,6 +507,50 @@ void DeviceMemoryManager<Traits>::WriteBlock(DAddr address, const void* src_poin
template <typename Traits>
void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size) {
const std::size_t page_offset = address & Memory::YUZU_PAGEMASK;
if (size <= Memory::YUZU_PAGESIZE - page_offset) {
const DAddr guest_page = address & ~static_cast<DAddr>(Memory::YUZU_PAGEMASK);
if (t_slot0.guest_page == guest_page && t_slot0.host_ptr != nullptr) {
std::memcpy(dest_pointer, t_slot0.host_ptr + page_offset, size);
return;
}
if (t_slot1.guest_page == guest_page && t_slot1.host_ptr != nullptr) {
std::memcpy(dest_pointer, t_slot1.host_ptr + page_offset, size);
return;
}
if (t_slot2.guest_page == guest_page && t_slot2.host_ptr != nullptr) {
std::memcpy(dest_pointer, t_slot2.host_ptr + page_offset, size);
return;
}
if (t_slot3.guest_page == guest_page && t_slot3.host_ptr != nullptr) {
std::memcpy(dest_pointer, t_slot3.host_ptr + page_offset, size);
return;
}
const std::size_t page_index = address >> Memory::YUZU_PAGEBITS;
const auto phys_addr = compressed_physical_ptr[page_index];
if (phys_addr != 0) {
auto* const mem_ptr = GetPointerFromRaw<u8>(
(static_cast<PAddr>(phys_addr - 1) << Memory::YUZU_PAGEBITS));
switch (cache_cursor & 3U) {
case 0:
t_slot0 = TranslationEntry{.guest_page = guest_page, .host_ptr = mem_ptr};
break;
case 1:
t_slot1 = TranslationEntry{.guest_page = guest_page, .host_ptr = mem_ptr};
break;
case 2:
t_slot2 = TranslationEntry{.guest_page = guest_page, .host_ptr = mem_ptr};
break;
default:
t_slot3 = TranslationEntry{.guest_page = guest_page, .host_ptr = mem_ptr};
break;
}
cache_cursor = (cache_cursor + 1) & 3U;
std::memcpy(dest_pointer, mem_ptr + page_offset, size);
return;
}
}
WalkBlock(
address, size,
[&](size_t copy_amount, DAddr current_vaddr) {

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@ -39,7 +42,8 @@ public:
static constexpr u64 BASE_PAGE_SIZE = 1ULL << BASE_PAGE_BITS;
explicit BufferBase(VAddr cpu_addr_, u64 size_bytes_)
: cpu_addr{cpu_addr_}, size_bytes{size_bytes_} {}
: cpu_addr{cpu_addr_}, cpu_addr_cached{static_cast<DAddr>(cpu_addr_)},
size_bytes{size_bytes_} {}
explicit BufferBase(NullBufferParams) {}
@ -97,6 +101,8 @@ public:
return cpu_addr;
}
DAddr cpu_addr_cached = 0;
/// Returns the offset relative to the given CPU address
/// @pre IsInBounds returns true
[[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept {

View File

@ -382,6 +382,10 @@ void BufferCache<P>::BindHostComputeBuffers() {
BindHostComputeUniformBuffers();
BindHostComputeStorageBuffers();
BindHostComputeTextureBuffers();
if (any_buffer_uploaded) {
runtime.PostCopyBarrier();
any_buffer_uploaded = false;
}
}
template <class P>
@ -766,45 +770,231 @@ void BufferCache<P>::BindHostIndexBuffer() {
}
}
template <class P>
void BufferCache<P>::BindHostVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size,
u32 stride) {
if constexpr (IS_OPENGL) {
runtime.BindVertexBuffer(index, buffer, offset, size, stride);
} else {
runtime.BindVertexBuffer(index, buffer.Handle(), offset, size, stride);
}
}
template <class P>
Binding& BufferCache<P>::VertexBufferSlot(u32 index) {
ASSERT(index < NUM_VERTEX_BUFFERS);
switch (index) {
case 0:
return v_buffer0;
case 1:
return v_buffer1;
case 2:
return v_buffer2;
case 3:
return v_buffer3;
case 4:
return v_buffer4;
case 5:
return v_buffer5;
case 6:
return v_buffer6;
case 7:
return v_buffer7;
case 8:
return v_buffer8;
case 9:
return v_buffer9;
case 10:
return v_buffer10;
case 11:
return v_buffer11;
case 12:
return v_buffer12;
case 13:
return v_buffer13;
case 14:
return v_buffer14;
case 15:
return v_buffer15;
#ifndef __APPLE__
case 16:
return v_buffer16;
case 17:
return v_buffer17;
case 18:
return v_buffer18;
case 19:
return v_buffer19;
case 20:
return v_buffer20;
case 21:
return v_buffer21;
case 22:
return v_buffer22;
case 23:
return v_buffer23;
case 24:
return v_buffer24;
case 25:
return v_buffer25;
case 26:
return v_buffer26;
case 27:
return v_buffer27;
case 28:
return v_buffer28;
case 29:
return v_buffer29;
case 30:
return v_buffer30;
case 31:
return v_buffer31;
#endif
default:
#ifdef __APPLE__
return v_buffer15;
#else
return v_buffer31;
#endif
}
}
template <class P>
const Binding& BufferCache<P>::VertexBufferSlot(u32 index) const {
ASSERT(index < NUM_VERTEX_BUFFERS);
switch (index) {
case 0:
return v_buffer0;
case 1:
return v_buffer1;
case 2:
return v_buffer2;
case 3:
return v_buffer3;
case 4:
return v_buffer4;
case 5:
return v_buffer5;
case 6:
return v_buffer6;
case 7:
return v_buffer7;
case 8:
return v_buffer8;
case 9:
return v_buffer9;
case 10:
return v_buffer10;
case 11:
return v_buffer11;
case 12:
return v_buffer12;
case 13:
return v_buffer13;
case 14:
return v_buffer14;
case 15:
return v_buffer15;
#ifndef __APPLE__
case 16:
return v_buffer16;
case 17:
return v_buffer17;
case 18:
return v_buffer18;
case 19:
return v_buffer19;
case 20:
return v_buffer20;
case 21:
return v_buffer21;
case 22:
return v_buffer22;
case 23:
return v_buffer23;
case 24:
return v_buffer24;
case 25:
return v_buffer25;
case 26:
return v_buffer26;
case 27:
return v_buffer27;
case 28:
return v_buffer28;
case 29:
return v_buffer29;
case 30:
return v_buffer30;
case 31:
return v_buffer31;
#endif
default:
#ifdef __APPLE__
return v_buffer15;
#else
return v_buffer31;
#endif
}
}
template <class P>
void BufferCache<P>::UpdateVertexBufferSlot(u32 index, const Binding& binding) {
Binding& slot = VertexBufferSlot(index);
if (slot.device_addr != binding.device_addr || slot.size != binding.size) {
++vertex_buffers_serial;
}
slot = binding;
if (binding.buffer_id != NULL_BUFFER_ID && binding.size != 0) {
enabled_vertex_buffers_mask |= (1u << index);
} else {
enabled_vertex_buffers_mask &= ~(1u << index);
}
}
template <class P>
void BufferCache<P>::BindHostVertexBuffers() {
HostBindings<typename P::Buffer> host_bindings;
bool any_valid{false};
auto& flags = maxwell3d->dirty.flags;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
const Binding& binding = channel_state->vertex_buffers[index];
u32 enabled_mask = enabled_vertex_buffers_mask;
HostBindings<Buffer> bindings{};
u32 last_index = std::numeric_limits<u32>::max();
const auto flush_bindings = [&]() {
if (bindings.buffers.empty()) {
return;
}
bindings.max_index = bindings.min_index + static_cast<u32>(bindings.buffers.size());
runtime.BindVertexBuffers(bindings);
bindings = HostBindings<Buffer>{};
last_index = std::numeric_limits<u32>::max();
};
while (enabled_mask != 0) {
const u32 index = std::countr_zero(enabled_mask);
enabled_mask &= (enabled_mask - 1);
const Binding& binding = VertexBufferSlot(index);
Buffer& buffer = slot_buffers[binding.buffer_id];
TouchBuffer(buffer, binding.buffer_id);
SynchronizeBuffer(buffer, binding.device_addr, binding.size);
if (!flags[Dirty::VertexBuffer0 + index]) {
flush_bindings();
continue;
}
flags[Dirty::VertexBuffer0 + index] = false;
host_bindings.min_index = (std::min)(host_bindings.min_index, index);
host_bindings.max_index = (std::max)(host_bindings.max_index, index);
any_valid = true;
}
if (any_valid) {
host_bindings.max_index++;
for (u32 index = host_bindings.min_index; index < host_bindings.max_index; index++) {
flags[Dirty::VertexBuffer0 + index] = false;
const Binding& binding = channel_state->vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
const u32 stride = maxwell3d->regs.vertex_streams[index].stride;
const u32 offset = buffer.Offset(binding.device_addr);
buffer.MarkUsage(offset, binding.size);
host_bindings.buffers.push_back(&buffer);
host_bindings.offsets.push_back(offset);
host_bindings.sizes.push_back(binding.size);
host_bindings.strides.push_back(stride);
if (!bindings.buffers.empty() && index != last_index + 1) {
flush_bindings();
}
runtime.BindVertexBuffers(host_bindings);
if (bindings.buffers.empty()) {
bindings.min_index = index;
}
bindings.buffers.push_back(&buffer);
bindings.offsets.push_back(offset);
bindings.sizes.push_back(binding.size);
bindings.strides.push_back(stride);
last_index = index;
}
flush_bindings();
}
template <class P>
@ -1208,17 +1398,20 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) {
u32 size = address_size; // TODO: Analyze stride and number of vertices
if (array.enable == 0 || size == 0 || !device_addr) {
channel_state->vertex_buffers[index] = NULL_BINDING;
UpdateVertexBufferSlot(index, NULL_BINDING);
return;
}
if (!gpu_memory->IsWithinGPUAddressRange(gpu_addr_end) || size >= 64_MiB) {
size = static_cast<u32>(gpu_memory->MaxContinuousRange(gpu_addr_begin, size));
}
const BufferId buffer_id = FindBuffer(*device_addr, size);
channel_state->vertex_buffers[index] = Binding{
const Binding binding{
.device_addr = *device_addr,
.size = size,
.buffer_id = buffer_id,
};
channel_state->vertex_buffers[index] = binding;
UpdateVertexBufferSlot(index, binding);
}
template <class P>
@ -1531,12 +1724,12 @@ void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
template <class P>
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 size) {
boost::container::small_vector<BufferCopy, 4> copies;
upload_copies.clear();
u64 total_size_bytes = 0;
u64 largest_copy = 0;
DAddr buffer_start = buffer.CpuAddr();
const DAddr buffer_start = buffer.cpu_addr_cached;
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
copies.push_back(BufferCopy{
upload_copies.push_back(BufferCopy{
.src_offset = total_size_bytes,
.dst_offset = device_addr_out - buffer_start,
.size = range_size,
@ -1547,8 +1740,9 @@ bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, DAddr device_addr, u32 si
if (total_size_bytes == 0) {
return true;
}
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
const std::span<BufferCopy> copies_span(upload_copies.data(), upload_copies.size());
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
any_buffer_uploaded = true;
return false;
}
@ -1738,6 +1932,7 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id, bool do_not_mark) {
auto& binding = channel_state->vertex_buffers[index];
if (binding.buffer_id == buffer_id) {
binding.buffer_id = BufferId{};
UpdateVertexBufferSlot(index, binding);
dirty_vertex_buffers.push_back(index);
}
}

View File

@ -321,6 +321,7 @@ public:
std::recursive_mutex mutex;
Runtime& runtime;
bool any_buffer_uploaded = false;
private:
template <typename Func>
@ -373,6 +374,8 @@ private:
void BindHostTransformFeedbackBuffers();
void BindHostVertexBuffer(u32 index, Buffer& buffer, u32 offset, u32 size, u32 stride);
void BindHostComputeUniformBuffers();
void BindHostComputeStorageBuffers();
@ -454,6 +457,12 @@ private:
[[nodiscard]] bool HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept;
[[nodiscard]] Binding& VertexBufferSlot(u32 index);
[[nodiscard]] const Binding& VertexBufferSlot(u32 index) const;
void UpdateVertexBufferSlot(u32 index, const Binding& binding);
void ClearDownload(DAddr base_addr, u64 size);
void InlineMemoryImplementation(DAddr dest_address, size_t copy_size,
@ -473,6 +482,45 @@ private:
u32 last_index_count = 0;
u32 enabled_vertex_buffers_mask = 0;
u64 vertex_buffers_serial = 0;
Binding v_buffer0{};
Binding v_buffer1{};
Binding v_buffer2{};
Binding v_buffer3{};
Binding v_buffer4{};
Binding v_buffer5{};
Binding v_buffer6{};
Binding v_buffer7{};
Binding v_buffer8{};
Binding v_buffer9{};
Binding v_buffer10{};
Binding v_buffer11{};
Binding v_buffer12{};
Binding v_buffer13{};
Binding v_buffer14{};
Binding v_buffer15{};
#ifndef __APPLE__
Binding v_buffer16{};
Binding v_buffer17{};
Binding v_buffer18{};
Binding v_buffer19{};
Binding v_buffer20{};
Binding v_buffer21{};
Binding v_buffer22{};
Binding v_buffer23{};
Binding v_buffer24{};
Binding v_buffer25{};
Binding v_buffer26{};
Binding v_buffer27{};
Binding v_buffer28{};
Binding v_buffer29{};
Binding v_buffer30{};
Binding v_buffer31{};
#endif
boost::container::small_vector<BufferCopy, 4> upload_copies;
MemoryTracker memory_tracker;
Common::RangeSet<DAddr> uncommitted_gpu_modified_ranges;
Common::PageBitsetRangeSet<DAddr, CACHING_PAGEBITS, (1ULL << 34)> gpu_modified_ranges;

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
@ -189,6 +189,10 @@ void ComputePipeline::Configure() {
buffer_cache.runtime.SetEnableStorageBuffers(use_storage_buffers);
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
buffer_cache.BindHostComputeBuffers();
if (buffer_cache.any_buffer_uploaded) {
buffer_cache.runtime.PostCopyBarrier();
buffer_cache.any_buffer_uploaded = false;
}
const VideoCommon::ImageViewInOut* views_it{views.data() + num_texture_buffers +
num_image_buffers};

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
@ -558,6 +558,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
}
if (buffer_cache.any_buffer_uploaded) {
buffer_cache.runtime.PostCopyBarrier();
buffer_cache.any_buffer_uploaded = false;
}
return true;
}

View File

@ -203,6 +203,10 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
buffer_cache.UpdateComputeBuffers();
buffer_cache.BindHostComputeBuffers();
if (buffer_cache.any_buffer_uploaded) {
buffer_cache.runtime.PostCopyBarrier();
buffer_cache.any_buffer_uploaded = false;
}
RescalingPushConstant rescaling;
const VideoCommon::SamplerId* samplers_it{samplers.data()};

View File

@ -496,6 +496,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
prepare_stage(4);
}
if (buffer_cache.any_buffer_uploaded) {
buffer_cache.runtime.PostCopyBarrier();
buffer_cache.any_buffer_uploaded = false;
}
texture_cache.UpdateRenderTargets(false);
texture_cache.CheckFeedbackLoop(views);
ConfigureDraw(rescaling, render_area);