diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b368ffea05..1d8fa13c61 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -739,12 +739,18 @@ void BufferCache

::BindHostIndexBuffer() { const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { - auto upload_staging = runtime.UploadStagingBuffer(size); - std::array copies{ - {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; - std::memcpy(upload_staging.mapped_span.data(), - draw_state.inline_index_draw_indexes.data(), size); - runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true); + if (buffer.IsHostVisible()) { + // write directly to mapped buffer + std::memcpy(buffer.Mapped().data(), + draw_state.inline_index_draw_indexes.data(), size); + } else { + auto upload_staging = runtime.UploadStagingBuffer(size); + std::array copies{ + {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; + std::memcpy(upload_staging.mapped_span.data(), + draw_state.inline_index_draw_indexes.data(), size); + runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true); + } } else { buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); } @@ -1590,6 +1596,15 @@ void BufferCache

::MappedUploadMemory([[maybe_unused]] Buffer& buffer, [[maybe_unused]] u64 total_size_bytes, [[maybe_unused]] std::span copies) { if constexpr (USE_MEMORY_MAPS) { + if (buffer.IsHostVisible() && runtime.CanReorderUpload(buffer, copies)) { + const std::span mapped_span = buffer.Mapped(); + for (const BufferCopy& copy : copies) { + u8* const dst_pointer = mapped_span.data() + copy.dst_offset; + const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset; + device_memory.ReadBlockUnsafe(device_addr, dst_pointer, copy.size); + } + return; + } auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); const std::span staging_pointer = upload_staging.mapped_span; for (BufferCopy& copy : copies) { @@ -1634,16 +1649,22 @@ void BufferCache

::InlineMemoryImplementation(DAddr dest_address, size_t copy_ SynchronizeBuffer(buffer, dest_address, static_cast(copy_size)); if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { - auto upload_staging = runtime.UploadStagingBuffer(copy_size); - std::array copies{BufferCopy{ - .src_offset = upload_staging.offset, - .dst_offset = buffer.Offset(dest_address), - .size = copy_size, - }}; - u8* const src_pointer = upload_staging.mapped_span.data(); - std::memcpy(src_pointer, inlined_buffer.data(), copy_size); - const bool can_reorder = runtime.CanReorderUpload(buffer, copies); - runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); + const u32 buffer_offset = buffer.Offset(dest_address); + if (buffer.IsHostVisible()) { + // write directly to mapped buffer + std::memcpy(buffer.Mapped().data() + buffer_offset, inlined_buffer.data(), copy_size); + } else { + auto upload_staging = runtime.UploadStagingBuffer(copy_size); + std::array copies{BufferCopy{ + .src_offset = upload_staging.offset, + .dst_offset = buffer_offset, + .size = copy_size, + }}; + u8* const src_pointer = upload_staging.mapped_span.data(); + std::memcpy(src_pointer, inlined_buffer.data(), copy_size); + const bool can_reorder = runtime.CanReorderUpload(buffer, copies); + runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); + } } else { buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 23ebe50196..423de151a7 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -34,6 +34,14 @@ public: void MarkUsage(u64 offset, u64 size) {} + [[nodiscard]] bool IsHostVisible() const noexcept { + return false; + } + + [[nodiscard]] std::span Mapped() noexcept { + return {}; + } + [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index b73fcd162b..87bc3fa352 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -43,6 +43,14 @@ public: return tracker.IsUsed(offset, size); } + [[nodiscard]] bool IsHostVisible() const noexcept { + return buffer.IsHostVisible(); + } + + [[nodiscard]] std::span Mapped() noexcept { + return buffer.Mapped(); + } + void MarkUsage(u64 offset, u64 size) noexcept { tracker.Track(offset, size); } diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 73a232ddee..54f508a287 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -798,6 +798,10 @@ public: return must_emulate_scaled_formats; } + bool HasUnifiedMemory() const { + return is_integrated; + } + bool HasNullDescriptor() const { return features.robustness2.nullDescriptor; } diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index ab93f256c0..b11ddeb32d 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -259,7 +259,7 @@ namespace Vulkan { vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const { - const VmaAllocationCreateInfo alloc_ci = { + VmaAllocationCreateInfo alloc_ci = { .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), .usage = MemoryUsageVma(usage), .requiredFlags = 0, @@ -270,6 +270,11 @@ namespace Vulkan { .priority = 0.f, }; + if (device.HasUnifiedMemory() && usage == MemoryUsage::DeviceLocal) { + alloc_ci.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; + alloc_ci.preferredFlags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + } + VkBuffer handle{}; VmaAllocationInfo alloc_info{}; VmaAllocation allocation{};