[vk] Remove UniformRing and vkCmdResetQueryPool (#3270)
Fixes perfomance regression on Xenoblade Chronicles DE and Pokemon Scarlet (among other games) It should be investigated the reason why such perfomance loss (more than ~10% in some case) At core it partially reverted the following commits: Reverts "[vk] Introduce Ring Buffers for Uniform Buffer (#2698)" This reverts commit776958c79d. Revert "[vk] Bring Vulkan closer to Spec (#180)" This reverts commitc8d6f23129. Revert "[VK] PR 180 extension (#257)" This reverts commit444b9f361e. Revert "[vk] Fixes regression of PR #180 vk_scheduler.cpp for AMD GPU and Windows OS (#3071)" This reverts commitbe218cc020. Signed-off-by: Caio Oliveira <caiooliveirafarias0@gmail.com> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3270 Reviewed-by: Lizzie <lizzie@eden-emu.dev> Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Co-authored-by: Caio Oliveira <caiooliveirafarias0@gmail.com> Co-committed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
This commit is contained in:
parent
5edcdea78f
commit
8440c2074d
|
|
@ -338,11 +338,6 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
|
|||
uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
|
||||
compute_pass_descriptor_queue);
|
||||
}
|
||||
const u32 ubo_align = static_cast<u32>(
|
||||
device.GetUniformBufferAlignment() //check if the device has it
|
||||
);
|
||||
// add the ability to change the size in settings in future
|
||||
uniform_ring.Init(memory_allocator, 8 * 1024 * 1024 /* 8 MiB */, ubo_align ? ubo_align : 256);
|
||||
quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_,
|
||||
scheduler_, staging_pool_);
|
||||
quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_,
|
||||
|
|
@ -361,41 +356,6 @@ void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
|
|||
staging_pool.FreeDeferred(ref);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::UniformRing::Init(MemoryAllocator& alloc, u64 bytes, u32 alignment)
|
||||
{
|
||||
for (size_t i = 0; i < NUM_FRAMES; ++i) {
|
||||
VkBufferCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = bytes,
|
||||
.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
buffers[i] = alloc.CreateBuffer(ci, MemoryUsage::Upload);
|
||||
mapped[i] = buffers[i].Mapped().data();
|
||||
}
|
||||
size = bytes;
|
||||
align = alignment ? alignment : 256;
|
||||
head = 0;
|
||||
current_frame = 0;
|
||||
}
|
||||
|
||||
std::span<u8> BufferCacheRuntime::UniformRing::Alloc(u32 bytes, u32& out_offset) {
|
||||
const u64 aligned = Common::AlignUp(head, static_cast<u64>(align));
|
||||
u64 end = aligned + bytes;
|
||||
|
||||
if (end > size) {
|
||||
return {}; // Fallback to staging pool
|
||||
}
|
||||
|
||||
out_offset = static_cast<u32>(aligned);
|
||||
head = end;
|
||||
return {mapped[current_frame] + out_offset, bytes};
|
||||
}
|
||||
|
||||
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
|
||||
return device.GetDeviceLocalMemory();
|
||||
}
|
||||
|
|
@ -416,7 +376,6 @@ void BufferCacheRuntime::TickFrame(Common::SlotVector<Buffer>& slot_buffers) noe
|
|||
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
|
||||
it->ResetUsageTracking();
|
||||
}
|
||||
uniform_ring.BeginFrame();
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
|
|
|
|||
|
|
@ -127,15 +127,9 @@ public:
|
|||
|
||||
void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);
|
||||
|
||||
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t /*stage*/,
|
||||
[[maybe_unused]] u32 /*binding_index*/,
|
||||
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
|
||||
[[maybe_unused]] u32 binding_index,
|
||||
u32 size) {
|
||||
u32 offset = 0;
|
||||
if (auto span = uniform_ring.Alloc(size, offset); !span.empty()) {
|
||||
BindBuffer(*uniform_ring.buffers[uniform_ring.current_frame], offset, size);
|
||||
return span;
|
||||
}
|
||||
// Fallback for giant requests
|
||||
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
|
||||
BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
|
||||
return ref.mapped_span;
|
||||
|
|
@ -163,24 +157,6 @@ private:
|
|||
void ReserveNullBuffer();
|
||||
vk::Buffer CreateNullBuffer();
|
||||
|
||||
struct UniformRing {
|
||||
static constexpr size_t NUM_FRAMES = 3;
|
||||
std::array<vk::Buffer, NUM_FRAMES> buffers{};
|
||||
std::array<u8*, NUM_FRAMES> mapped{};
|
||||
u64 size = 0;
|
||||
u64 head = 0;
|
||||
u32 align = 256;
|
||||
size_t current_frame = 0;
|
||||
|
||||
void Init(MemoryAllocator& alloc, u64 bytes, u32 alignment);
|
||||
void BeginFrame() {
|
||||
current_frame = (current_frame + 1) % NUM_FRAMES;
|
||||
head = 0;
|
||||
}
|
||||
std::span<u8> Alloc(u32 bytes, u32& out_offset);
|
||||
};
|
||||
UniformRing uniform_ring;
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
Scheduler& scheduler;
|
||||
|
|
|
|||
|
|
@ -156,19 +156,6 @@ public:
|
|||
|
||||
ReserveHostQuery();
|
||||
|
||||
// Ensure outside render pass
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
// Reset query pool outside render pass
|
||||
scheduler.Record([query_pool = current_query_pool,
|
||||
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.ResetQueryPool(query_pool, static_cast<u32>(query_index), 1);
|
||||
});
|
||||
|
||||
// Manually restart the render pass (required for vkCmdClearAttachments, etc.)
|
||||
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
|
||||
|
||||
// Begin query inside the newly started render pass
|
||||
scheduler.Record([query_pool = current_query_pool,
|
||||
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
|
||||
const bool use_precise = Settings::IsGPULevelHigh();
|
||||
|
|
|
|||
|
|
@ -302,8 +302,6 @@ void Scheduler::EndRenderPass()
|
|||
images = renderpass_images,
|
||||
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
|
||||
std::array<VkImageMemoryBarrier, 9> barriers;
|
||||
VkPipelineStageFlags src_stages = 0;
|
||||
|
||||
for (size_t i = 0; i < num_images; ++i) {
|
||||
const VkImageSubresourceRange& range = ranges[i];
|
||||
const bool is_color = (range.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
|
||||
|
|
@ -312,20 +310,14 @@ void Scheduler::EndRenderPass()
|
|||
| VK_IMAGE_ASPECT_STENCIL_BIT)) !=0;
|
||||
|
||||
VkAccessFlags src_access = 0;
|
||||
VkPipelineStageFlags this_stage = 0;
|
||||
|
||||
if (is_color) {
|
||||
if (is_color)
|
||||
src_access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
this_stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
}
|
||||
|
||||
if (is_depth_stencil) {
|
||||
else if (is_depth_stencil)
|
||||
src_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
this_stage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT
|
||||
| VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
}
|
||||
|
||||
src_stages |= this_stage;
|
||||
else
|
||||
src_access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT
|
||||
| VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
|
||||
barriers[i] = VkImageMemoryBarrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
|
|
@ -344,15 +336,10 @@ void Scheduler::EndRenderPass()
|
|||
.subresourceRange = range,
|
||||
};
|
||||
}
|
||||
|
||||
// Graft: ensure explicit fragment tests + color output stages are always synchronized (AMD/Windows fix)
|
||||
src_stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
|
||||
cmdbuf.EndRenderPass();
|
||||
|
||||
cmdbuf.PipelineBarrier(src_stages,
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0,
|
||||
nullptr,
|
||||
|
|
|
|||
|
|
@ -121,7 +121,6 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
|||
X(vkCmdEndConditionalRenderingEXT);
|
||||
X(vkCmdEndQuery);
|
||||
X(vkCmdEndRenderPass);
|
||||
X(vkCmdResetQueryPool);
|
||||
X(vkCmdEndTransformFeedbackEXT);
|
||||
X(vkCmdEndDebugUtilsLabelEXT);
|
||||
X(vkCmdFillBuffer);
|
||||
|
|
|
|||
|
|
@ -221,7 +221,6 @@ struct DeviceDispatch : InstanceDispatch {
|
|||
PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT{};
|
||||
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
|
||||
PFN_vkCmdEndQuery vkCmdEndQuery{};
|
||||
PFN_vkCmdResetQueryPool vkCmdResetQueryPool{};
|
||||
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
|
||||
PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
|
||||
PFN_vkCmdFillBuffer vkCmdFillBuffer{};
|
||||
|
|
@ -1144,9 +1143,6 @@ public:
|
|||
VkCommandBuffer operator*() const noexcept {
|
||||
return handle;
|
||||
}
|
||||
void ResetQueryPool(VkQueryPool query_pool, uint32_t first, uint32_t count) const noexcept {
|
||||
dld->vkCmdResetQueryPool(handle, query_pool, first, count);
|
||||
}
|
||||
void Begin(const VkCommandBufferBeginInfo& begin_info) const {
|
||||
Check(dld->vkBeginCommandBuffer(handle, &begin_info));
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue