[vulkan] skip staging buffer for uploads for UMA
initial implementation to skip staging buffer for uploads for unified memory access devices like android and igpus.
This commit is contained in:
parent
866881d0e3
commit
fbf96ad587
|
|
@ -739,12 +739,18 @@ void BufferCache<P>::BindHostIndexBuffer() {
|
||||||
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
|
||||||
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
|
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
|
||||||
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
|
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
|
||||||
auto upload_staging = runtime.UploadStagingBuffer(size);
|
if (buffer.IsHostVisible()) {
|
||||||
std::array<BufferCopy, 1> copies{
|
// write directly to mapped buffer
|
||||||
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
|
std::memcpy(buffer.Mapped().data(),
|
||||||
std::memcpy(upload_staging.mapped_span.data(),
|
draw_state.inline_index_draw_indexes.data(), size);
|
||||||
draw_state.inline_index_draw_indexes.data(), size);
|
} else {
|
||||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
|
auto upload_staging = runtime.UploadStagingBuffer(size);
|
||||||
|
std::array<BufferCopy, 1> copies{
|
||||||
|
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
|
||||||
|
std::memcpy(upload_staging.mapped_span.data(),
|
||||||
|
draw_state.inline_index_draw_indexes.data(), size);
|
||||||
|
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
|
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
|
||||||
}
|
}
|
||||||
|
|
@ -1590,6 +1596,15 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
|
||||||
[[maybe_unused]] u64 total_size_bytes,
|
[[maybe_unused]] u64 total_size_bytes,
|
||||||
[[maybe_unused]] std::span<BufferCopy> copies) {
|
[[maybe_unused]] std::span<BufferCopy> copies) {
|
||||||
if constexpr (USE_MEMORY_MAPS) {
|
if constexpr (USE_MEMORY_MAPS) {
|
||||||
|
if (buffer.IsHostVisible() && runtime.CanReorderUpload(buffer, copies)) {
|
||||||
|
const std::span<u8> mapped_span = buffer.Mapped();
|
||||||
|
for (const BufferCopy& copy : copies) {
|
||||||
|
u8* const dst_pointer = mapped_span.data() + copy.dst_offset;
|
||||||
|
const DAddr device_addr = buffer.CpuAddr() + copy.dst_offset;
|
||||||
|
device_memory.ReadBlockUnsafe(device_addr, dst_pointer, copy.size);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
|
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
|
||||||
const std::span<u8> staging_pointer = upload_staging.mapped_span;
|
const std::span<u8> staging_pointer = upload_staging.mapped_span;
|
||||||
for (BufferCopy& copy : copies) {
|
for (BufferCopy& copy : copies) {
|
||||||
|
|
@ -1634,16 +1649,22 @@ void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_
|
||||||
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
|
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
|
||||||
|
|
||||||
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
|
if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
|
||||||
auto upload_staging = runtime.UploadStagingBuffer(copy_size);
|
const u32 buffer_offset = buffer.Offset(dest_address);
|
||||||
std::array copies{BufferCopy{
|
if (buffer.IsHostVisible()) {
|
||||||
.src_offset = upload_staging.offset,
|
// write directly to mapped buffer
|
||||||
.dst_offset = buffer.Offset(dest_address),
|
std::memcpy(buffer.Mapped().data() + buffer_offset, inlined_buffer.data(), copy_size);
|
||||||
.size = copy_size,
|
} else {
|
||||||
}};
|
auto upload_staging = runtime.UploadStagingBuffer(copy_size);
|
||||||
u8* const src_pointer = upload_staging.mapped_span.data();
|
std::array copies{BufferCopy{
|
||||||
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
|
.src_offset = upload_staging.offset,
|
||||||
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
|
.dst_offset = buffer_offset,
|
||||||
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
|
.size = copy_size,
|
||||||
|
}};
|
||||||
|
u8* const src_pointer = upload_staging.mapped_span.data();
|
||||||
|
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
|
||||||
|
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
|
||||||
|
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
|
buffer.ImmediateUpload(buffer.Offset(dest_address), inlined_buffer.first(copy_size));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,14 @@ public:
|
||||||
|
|
||||||
void MarkUsage(u64 offset, u64 size) {}
|
void MarkUsage(u64 offset, u64 size) {}
|
||||||
|
|
||||||
|
[[nodiscard]] bool IsHostVisible() const noexcept {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] std::span<u8> Mapped() noexcept {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
|
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
|
||||||
|
|
||||||
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
|
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,14 @@ public:
|
||||||
return tracker.IsUsed(offset, size);
|
return tracker.IsUsed(offset, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool IsHostVisible() const noexcept {
|
||||||
|
return buffer.IsHostVisible();
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] std::span<u8> Mapped() noexcept {
|
||||||
|
return buffer.Mapped();
|
||||||
|
}
|
||||||
|
|
||||||
void MarkUsage(u64 offset, u64 size) noexcept {
|
void MarkUsage(u64 offset, u64 size) noexcept {
|
||||||
tracker.Track(offset, size);
|
tracker.Track(offset, size);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -798,6 +798,10 @@ public:
|
||||||
return must_emulate_scaled_formats;
|
return must_emulate_scaled_formats;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HasUnifiedMemory() const {
|
||||||
|
return is_integrated;
|
||||||
|
}
|
||||||
|
|
||||||
bool HasNullDescriptor() const {
|
bool HasNullDescriptor() const {
|
||||||
return features.robustness2.nullDescriptor;
|
return features.robustness2.nullDescriptor;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -259,7 +259,7 @@ namespace Vulkan {
|
||||||
vk::Buffer
|
vk::Buffer
|
||||||
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
|
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
|
||||||
{
|
{
|
||||||
const VmaAllocationCreateInfo alloc_ci = {
|
VmaAllocationCreateInfo alloc_ci = {
|
||||||
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
|
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
|
||||||
.usage = MemoryUsageVma(usage),
|
.usage = MemoryUsageVma(usage),
|
||||||
.requiredFlags = 0,
|
.requiredFlags = 0,
|
||||||
|
|
@ -270,6 +270,11 @@ namespace Vulkan {
|
||||||
.priority = 0.f,
|
.priority = 0.f,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (device.HasUnifiedMemory() && usage == MemoryUsage::DeviceLocal) {
|
||||||
|
alloc_ci.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||||
|
alloc_ci.preferredFlags |= VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
VkBuffer handle{};
|
VkBuffer handle{};
|
||||||
VmaAllocationInfo alloc_info{};
|
VmaAllocationInfo alloc_info{};
|
||||||
VmaAllocation allocation{};
|
VmaAllocation allocation{};
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue