diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index 07611ef98c..62c69c67b0 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -436,7 +436,23 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo .layerCount = 1, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // Blit image barrier - transition can happen at any stage + // Using specific stages for better parallelism, but keeping broad since this is a general utility + const VkPipelineStageFlags src_stages_blit = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + const VkPipelineStageFlags dst_stages_blit = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_blit, dst_stages_blit, 0, barrier); } diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 29a1c34976..c85af50060 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project @@ -68,7 +68,16 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo .layerCount = 1, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // Present path - use specific stages for better parallelism on tile-based GPUs + const VkPipelineStageFlags src_stages = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + const VkPipelineStageFlags dst_stages = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages, dst_stages, 0, barrier); } @@ -151,11 +160,12 @@ void DownloadColorImage(vk::CommandBuffer& cmdbuf, VkImage image, VkBuffer buffe .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; + // Host will read the downloaded data static constexpr VkMemoryBarrier memory_write_barrier{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT, }; const VkBufferImageCopy copy{ .bufferOffset = 0, @@ -170,10 +180,20 @@ void DownloadColorImage(vk::CommandBuffer& cmdbuf, VkImage image, VkBuffer buffe .imageOffset{.x = 0, .y = 0, .z = 0}, .imageExtent{extent}, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + const VkPipelineStageFlags src_stages_copy = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_copy, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier); cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, + const VkPipelineStageFlags dst_stages_copy = + VK_PIPELINE_STAGE_HOST_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages_copy, 0, memory_write_barrier, nullptr, image_write_barrier); } diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 6256bc8bd8..b10031de10 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -436,13 +436,27 @@ void BufferCacheRuntime::CopyBuffer(VkBuffer dst_buffer, VkBuffer src_buffer, scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([src_buffer, dst_buffer, vk_copies, barrier](vk::CommandBuffer cmdbuf) { if (barrier) { - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // Buffer writes can come from vertex input, shaders, or compute + const VkPipelineStageFlags src_stages = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); } cmdbuf.CopyBuffer(src_buffer, dst_buffer, VideoCommon::FixSmallVectorADL(vk_copies)); if (barrier) { + // Buffer reads can go to vertex input, shaders, or compute + const VkPipelineStageFlags dst_stages = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); + dst_stages, 0, WRITE_BARRIER); } }); } @@ -456,21 +470,36 @@ void BufferCacheRuntime::PreCopyBarrier() { }; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([](vk::CommandBuffer cmdbuf) { - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + const VkPipelineStageFlags src_stages = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); }); } void BufferCacheRuntime::PostCopyBarrier() { + // Specific access flags for buffer destinations: vertex input, uniforms, storage, index static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT | + VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_INDIRECT_COMMAND_READ_BIT, }; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([](vk::CommandBuffer cmdbuf) { - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + const VkPipelineStageFlags dst_stages = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages, 0, WRITE_BARRIER); }); } @@ -494,10 +523,24 @@ void BufferCacheRuntime::ClearBuffer(VkBuffer dest_buffer, u32 offset, size_t si scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dest_buffer, offset, size, value](vk::CommandBuffer cmdbuf) { - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + // Buffer writes can come from vertex input, shaders, or compute + const VkPipelineStageFlags src_stages_clear = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_clear, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); cmdbuf.FillBuffer(dest_buffer, offset, size, value); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // Buffer reads can go to vertex input, shaders, or compute + const VkPipelineStageFlags dst_stages_clear = + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages_clear, 0, WRITE_BARRIER); }); } diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 22e646afe9..c978906c38 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -446,13 +446,26 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ const VkDescriptorSet set = descriptor_allocator.Commit(); device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // Memory writes can come from graphics or compute stages + const VkPipelineStageFlags src_stages_compute = + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_compute, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.Dispatch(1, 1, 1); + // Conditional rendering result used by draw indirect or conditional rendering + const VkPipelineStageFlags dst_stages_cond = + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier); + dst_stages_cond, 0, write_barrier); }); } @@ -520,14 +533,29 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe const VkDescriptorSet set = descriptor_allocator.Commit(); device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // Memory writes can come from graphics or compute stages + const VkPipelineStageFlags src_stages_scan = + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_scan, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, read_barrier); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *layout, 0, set, {}); cmdbuf.PushConstants(*layout, VK_SHADER_STAGE_COMPUTE_BIT, uniforms); cmdbuf.Dispatch(1, 1, 1); + // Query prefix scan results used by draw indirect, conditional rendering, or shaders + const VkPipelineStageFlags dst_stages_scan = + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_CONDITIONAL_RENDERING_BIT_EXT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, write_barrier); + dst_stages_scan, 0, write_barrier); }); } } @@ -579,8 +607,15 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(is_initialized ? VK_PIPELINE_STAGE_ALL_COMMANDS_BIT - : VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + // If initialized, memory writes can come from graphics or compute stages + const VkPipelineStageFlags src_stages_astc = is_initialized ? + (VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT) : + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + cmdbuf.PipelineBarrier(src_stages_astc, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, image_barrier); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, vk_pipeline); }); diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index 3b5c2e3c01..3960f78343 100644 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -441,7 +441,13 @@ void PresentManager::CopyToSwapchainImpl(Frame* frame) { }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, {}, + // Previous operations that might have written to the frame image + const VkPipelineStageFlags src_stages_present = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_present, VK_PIPELINE_STAGE_TRANSFER_BIT, {}, {}, {}, pre_barriers); if (blit_supported) { diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 414c270c8e..b9ff4cd00e 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project @@ -809,12 +809,19 @@ public: .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_SHADER_READ_BIT, }; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([](vk::CommandBuffer cmdbuf) { + // After transfer, results may be read by host or used in subsequent operations + const VkPipelineStageFlags dst_stages_query = + VK_PIPELINE_STAGE_HOST_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); + dst_stages_query, 0, WRITE_BARRIER); }); std::scoped_lock lk(flush_guard); @@ -942,7 +949,7 @@ private: .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, }; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dst_buffer = current_bank->GetBuffer(), @@ -1475,22 +1482,40 @@ void QueryCacheRuntime::Barriers(bool is_prebarrier) { .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, }; + // Query results may be read by host or used in shaders/indirect commands static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT | VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_INDIRECT_COMMAND_READ_BIT, }; impl->scheduler.RequestOutsideRenderPassOperationContext(); if (is_prebarrier) { impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // Query data can be written by graphics or compute stages + const VkPipelineStageFlags src_stages_query = + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_query, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER); }); } else { impl->scheduler.Record([](vk::CommandBuffer cmdbuf) { + // Query results may be read by host or used in shaders/indirect commands + const VkPipelineStageFlags dst_stages_query = + VK_PIPELINE_STAGE_HOST_BIT | + VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); + dst_stages_query, 0, WRITE_BARRIER); }); } } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 60b899a811..4d508fc2f2 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -586,14 +586,23 @@ void RasterizerVulkan::DispatchCompute() { } const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; scheduler.RequestOutsideRenderPassOperationContext(); + // Compute shader reads from storage buffers, uniforms, and images static constexpr VkMemoryBarrier READ_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT, }; - scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, READ_BARRIER); }); + scheduler.Record([](vk::CommandBuffer cmdbuf) { + // Memory writes can come from graphics or compute stages + const VkPipelineStageFlags src_stages = + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + cmdbuf.PipelineBarrier(src_stages, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, READ_BARRIER); + }); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); // Log compute dispatch diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 80ff75e3b9..f1e15595b3 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project @@ -44,13 +44,19 @@ using VideoCore::Surface::SurfaceType; } VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format, - VkSampleCountFlagBits samples) { + VkSampleCountFlagBits samples, + bool is_depth_stencil) { using MaxwellToVK::SurfaceFormat; const SurfaceType surface_type = GetSurfaceType(format); const bool has_stencil = surface_type == SurfaceType::DepthStencil || surface_type == SurfaceType::Stencil; + // Use optimal layouts for attachments - this allows drivers to optimize tiling and access patterns + const VkImageLayout attachment_layout = is_depth_stencil + ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + return { .flags = {}, .format = SurfaceFormat(device, FormatType::Optimal, true, format).format, @@ -61,8 +67,8 @@ using VideoCore::Surface::SurfaceType; : VK_ATTACHMENT_LOAD_OP_DONT_CARE, .stencilStoreOp = has_stencil ? VK_ATTACHMENT_STORE_OP_STORE : VK_ATTACHMENT_STORE_OP_DONT_CARE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, + .initialLayout = attachment_layout, + .finalLayout = attachment_layout, }; } } // Anonymous namespace @@ -84,10 +90,10 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { const bool is_valid{format != PixelFormat::Invalid}; references[index] = VkAttachmentReference{ .attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED, - .layout = VK_IMAGE_LAYOUT_GENERAL, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, }; if (is_valid) { - descriptions.push_back(AttachmentDescription(*device, format, key.samples)); + descriptions.push_back(AttachmentDescription(*device, format, key.samples, false)); num_attachments = static_cast(index + 1); ++num_colors; } @@ -97,9 +103,9 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { if (key.depth_format != PixelFormat::Invalid) { depth_reference = VkAttachmentReference{ .attachment = num_colors, - .layout = VK_IMAGE_LAYOUT_GENERAL, + .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, }; - descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples)); + descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples, true)); } const VkSubpassDescription subpass{ .flags = 0, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index e526d606dc..aafcfdf65b 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -264,14 +264,26 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se const u64 signal_value = master_semaphore->NextTick(); RecordWithUploadBuffer([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer cmdbuf, vk::CommandBuffer upload_cmdbuf) { + // Specific access flags for upload destinations: vertex input, uniforms, storage, index, textures static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT | + VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, }; + // Specify exact stages that need the transfer results instead of ALL_COMMANDS_BIT upload_cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, WRITE_BARRIER); + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + 0, WRITE_BARRIER); upload_cmdbuf.End(); cmdbuf.End(); @@ -332,11 +344,11 @@ void Scheduler::EndRenderPass() query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false); query_cache->NotifySegment(false); - Record([num_images = num_renderpass_images, images = renderpass_images, ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { std::array barriers; + VkPipelineStageFlags src_stages = 0; for (size_t i = 0; i < num_images; ++i) { const VkImageSubresourceRange& range = ranges[i]; const bool is_color = (range.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; @@ -344,38 +356,76 @@ void Scheduler::EndRenderPass() & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) !=0; + // Determine optimal destination layout based on image usage + // After render pass, images may be used as shader resources or as attachments again + // Use optimal layouts to allow driver optimizations VkAccessFlags src_access = 0; + VkAccessFlags dst_access = 0; + VkPipelineStageFlags this_stage = 0; + VkImageLayout new_layout; - if (is_color) - src_access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - else if (is_depth_stencil) - src_access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - else - src_access |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - + if (is_color) { + // Color attachments can be read as textures or used as attachments again + src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + this_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + dst_access = VK_ACCESS_SHADER_READ_BIT + | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT + | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + } else if (is_depth_stencil) { + // Depth attachments can be read as textures or used as attachments again + src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + this_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + new_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + dst_access = VK_ACCESS_SHADER_READ_BIT + | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT + | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } else { + // Fallback to GENERAL for unknown usage + src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT + | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + this_stage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT + | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT + | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + new_layout = VK_IMAGE_LAYOUT_GENERAL; + dst_access = VK_ACCESS_SHADER_READ_BIT + | VK_ACCESS_SHADER_WRITE_BIT + | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT + | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT + | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT + | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + } + src_stages |= this_stage; barriers[i] = VkImageMemoryBarrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = src_access, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT - | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT - | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT - | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT - | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .dstAccessMask = dst_access, + .oldLayout = is_color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + : (is_depth_stencil ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_GENERAL), + .newLayout = new_layout, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = images[i], .subresourceRange = range, }; } + cmdbuf.EndRenderPass(); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + + // Use specific pipeline stages instead of ALL_COMMANDS_BIT for better parallelism + // The destination stages depend on how the images will be used next + const VkPipelineStageFlags dst_stages = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + + cmdbuf.PipelineBarrier(src_stages, + dst_stages, 0, nullptr, nullptr, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 39a43d5950..988ab53266 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -810,7 +810,8 @@ void BlitScale(Scheduler& scheduler, VkImage src_image, VkImage dst_image, const .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -822,7 +823,8 @@ void BlitScale(Scheduler& scheduler, VkImage src_image, VkImage dst_image, const .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -831,11 +833,26 @@ void BlitScale(Scheduler& scheduler, VkImage src_image, VkImage dst_image, const .subresourceRange = subresource_range, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + // Use specific pipeline stages for better parallelism + const VkPipelineStageFlags src_stages = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, nullptr, nullptr, read_barriers); cmdbuf.BlitImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, regions, vk_filter); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // After transfer, images may be used in graphics, compute, or as attachments + const VkPipelineStageFlags dst_stages = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages, 0, nullptr, nullptr, write_barriers); }); } @@ -1021,11 +1038,12 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, }; + // After reinterpret, buffers used as transfer source, uniforms, or storage static constexpr VkMemoryBarrier WRITE_BARRIER{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT, }; const std::array pre_barriers{ VkImageMemoryBarrier{ @@ -1092,18 +1110,39 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, .subresourceRange = dst_range.SubresourceRange(dst_aspect_mask), }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + const VkPipelineStageFlags src_stages_transfer = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_transfer, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, {}, pre_barriers); cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer, vk_in_copies); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + const VkPipelineStageFlags dst_stages_transfer = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages_transfer, 0, WRITE_BARRIER, nullptr, middle_in_barrier); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + cmdbuf.PipelineBarrier(src_stages_transfer, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, READ_BARRIER, {}, middle_out_barrier); cmdbuf.CopyBufferToImage(copy_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, vk_out_copies); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // After reinterpret copy, image may be used in shaders or as attachment + const VkPipelineStageFlags dst_stages_post = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages_post, 0, {}, {}, post_barriers); }); } @@ -1230,7 +1269,12 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + const VkPipelineStageFlags src_stages_resolve = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_resolve, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, nullptr, nullptr, read_barriers); if (is_resolve) { cmdbuf.ResolveImage(src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, @@ -1243,7 +1287,14 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst src_image, VK_IMAGE_LAYOUT_GENERAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, MakeImageBlit(dst_region, src_region, dst_layers, src_layers), vk_filter); } - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // After blit/resolve, image may be used in shaders or as attachment + const VkPipelineStageFlags dst_stages_blit = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages_blit, 0, write_barrier); }); } @@ -1821,7 +1872,14 @@ void Image::DownloadMemory(std::span buffers_span, std::span o .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + const VkPipelineStageFlags src_stages_download = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_download, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier); for (size_t index = 0; index < buffers.size(); index++) { @@ -1829,17 +1887,20 @@ void Image::DownloadMemory(std::span buffers_span, std::span o vk_copies[index]); } + // Host will read the downloaded data const VkMemoryBarrier memory_write_barrier{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT, }; + // Image will be used for shaders or as attachment const VkImageMemoryBarrier image_write_barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -1853,7 +1914,13 @@ void Image::DownloadMemory(std::span buffers_span, std::span o .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + const VkPipelineStageFlags dst_stages_download = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages_download, 0, memory_write_barrier, nullptr, image_write_barrier); }); return; @@ -1888,7 +1955,15 @@ void Image::DownloadMemory(std::span buffers_span, std::span o .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + // Image writes can come from graphics or compute stages + const VkPipelineStageFlags src_stages_img = + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; + cmdbuf.PipelineBarrier(src_stages_img, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier); for (size_t index = 0; index < buffers.size(); index++) { @@ -1920,7 +1995,14 @@ void Image::DownloadMemory(std::span buffers_span, std::span o .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + // After download, image may be used in shaders or as attachment + const VkPipelineStageFlags dst_stages_img = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stages_img, 0, memory_write_barrier, nullptr, image_write_barrier); }); } @@ -2474,7 +2556,9 @@ void TextureCacheRuntime::TransitionImageLayout(Image& image) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_NONE, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -2490,8 +2574,16 @@ void TextureCacheRuntime::TransitionImageLayout(Image& image) { }; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([barrier](vk::CommandBuffer cmdbuf) { + // After layout transition, image may be used in shaders or as attachment + const VkPipelineStageFlags dst_stages_layout = + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier); + dst_stages_layout, 0, barrier); }); } }