[vulkan] skip staging buffer for uploads for UMA

initial implementation to skip staging buffer for uploads for unified memory access devices like android and igpus.
This commit is contained in:
wildcard 2026-02-09 11:02:15 +01:00
parent 866881d0e3
commit fbf96ad587
5 changed files with 63 additions and 17 deletions

View File

@ -739,12 +739,18 @@ void BufferCache<P>::BindHostIndexBuffer() {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(size); if (buffer.IsHostVisible()) {
std::array<BufferCopy, 1> copies{ // write directly to mapped buffer
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; std::memcpy(buffer.Mapped().data(),
std::memcpy(upload_staging.mapped_span.data(), draw_state.inline_index_draw_indexes.data(), size);
draw_state.inline_index_draw_indexes.data(), size); } else {
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true); auto upload_staging = runtime.UploadStagingBuffer(size);
std::array<BufferCopy, 1> copies{
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
std::memcpy(upload_staging.mapped_span.data(),
draw_state.inline_index_draw_indexes.data(), size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
}
} else { } else {
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
} }
@ -1590,6 +1596,15 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
[[maybe_unused]] u64 total_size_bytes, [[maybe_unused]] u64 total_size_bytes,
[[maybe_unused]] std::span<BufferCopy> copies) { [[maybe_unused]] std::span<BufferCopy> copies) {
if constexpr (USE_MEMORY_MAPS) { if constexpr (USE_MEMORY_MAPS) {
if (buffer.IsHostVisible() && runtime.CanReorderUpload(buffer, copies)) {
const std::span<u8> mapped_span = buffer.Mapped();
for (const BufferCopy& copy : copies) {
u8* const dst_pointer = mapped_span.data() + copy.dst_offset;
const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
device_memory.ReadBlockUnsafe(device_addr, dst_pointer, copy.size);
}
return;
}
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
const std::span<u8> staging_pointer = upload_staging.mapped_span; const std::span<u8> staging_pointer = upload_staging.mapped_span;
for (BufferCopy& copy : copies) { for (BufferCopy& copy : copies) {
@ -1634,16 +1649,22 @@ void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) { if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(copy_size); const u32 buffer_offset = buffer.Offset(dest_address);
std::array copies{BufferCopy{ if (buffer.IsHostVisible()) {
.src_offset = upload_staging.offset, // write directly to mapped buffer
.dst_offset = buffer.Offset(dest_address), std::memcpy(buffer.Mapped().data() + buffer_offset, inlined_buffer.data(), copy_size);
.size = copy_size, } else {
}}; auto upload_staging = runtime.UploadStagingBuffer(copy_size);
u8* const src_pointer = upload_staging.mapped_span.data(); std::array copies{BufferCopy{
std::memcpy(src_pointer, inlined_buffer.data(), copy_size); .src_offset = upload_staging.offset,
const bool can_reorder = runtime.CanReorderUpload(buffer, copies); .dst_offset = buffer_offset,
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); .size = copy_size,
}};
u8* const src_pointer = upload_staging.mapped_span.data();
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
}
} else { } else {
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size)); buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
} }

View File

@ -34,6 +34,14 @@ public:
void MarkUsage(u64 offset, u64 size) {} void MarkUsage(u64 offset, u64 size) {}
[[nodiscard]] bool IsHostVisible() const noexcept {
return false;
}
[[nodiscard]] std::span<u8> Mapped() noexcept {
return {};
}
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {

View File

@ -43,6 +43,14 @@ public:
return tracker.IsUsed(offset, size); return tracker.IsUsed(offset, size);
} }
[[nodiscard]] bool IsHostVisible() const noexcept {
return buffer.IsHostVisible();
}
[[nodiscard]] std::span<u8> Mapped() noexcept {
return buffer.Mapped();
}
void MarkUsage(u64 offset, u64 size) noexcept { void MarkUsage(u64 offset, u64 size) noexcept {
tracker.Track(offset, size); tracker.Track(offset, size);
} }

View File

@ -798,6 +798,10 @@ public:
return must_emulate_scaled_formats; return must_emulate_scaled_formats;
} }
bool HasUnifiedMemory() const {
return is_integrated;
}
bool HasNullDescriptor() const { bool HasNullDescriptor() const {
return features.robustness2.nullDescriptor; return features.robustness2.nullDescriptor;
} }

View File

@ -259,7 +259,7 @@ namespace Vulkan {
vk::Buffer vk::Buffer
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
{ {
const VmaAllocationCreateInfo alloc_ci = { VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage), .usage = MemoryUsageVma(usage),
.requiredFlags = 0, .requiredFlags = 0,
@ -270,6 +270,11 @@ namespace Vulkan {
.priority = 0.f, .priority = 0.f,
}; };
if (device.HasUnifiedMemory() && usage == MemoryUsage::DeviceLocal) {
alloc_ci.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
alloc_ci.preferredFlags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
VkBuffer handle{}; VkBuffer handle{};
VmaAllocationInfo alloc_info{}; VmaAllocationInfo alloc_info{};
VmaAllocation allocation{}; VmaAllocation allocation{};