::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag
(std::max)((std::min)(device_local_memory - min_vacancy_critical, min_spacing_critical),
DEFAULT_CRITICAL_MEMORY));
minimum_memory = static_cast ::RunGarbageCollector() {
ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
};
+
const auto Cleanup = [this, &num_iterations, &high_priority_mode,
&aggressive_mode](ImageId image_id) {
if (num_iterations == 0) {
@@ -95,20 +129,36 @@ void TextureCache ::RunGarbageCollector() {
}
--num_iterations;
auto& image = slot_images[image_id];
+
+ // Never delete recently allocated sparse textures (within 3 frames)
+ const bool is_recently_allocated = image.allocation_tick >= frame_tick - 3;
+ if (is_recently_allocated && image.info.is_sparse) {
+ return false;
+ }
+
if (True(image.flags & ImageFlagBits::IsDecoding)) {
// This image is still being decoded, deleting it will invalidate the slot
// used by the async decoder thread.
return false;
}
- if (!aggressive_mode && True(image.flags & ImageFlagBits::CostlyLoad)) {
+
+ // Prioritize large sparse textures for cleanup
+ const bool is_large_sparse = lowmemorydevice &&
+ image.info.is_sparse &&
+ image.guest_size_bytes >= 256_MiB;
+
+ if (!aggressive_mode && !is_large_sparse &&
+ True(image.flags & ImageFlagBits::CostlyLoad)) {
return false;
}
+
const bool must_download =
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
- if (!high_priority_mode && must_download) {
+ if (!high_priority_mode && !is_large_sparse && must_download) {
return false;
}
- if (must_download) {
+
+ if (must_download && !is_large_sparse) {
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
const auto copies = FixSmallVectorADL(FullDownloadCopies(image.info));
image.DownloadMemory(map, copies);
@@ -116,11 +166,13 @@ void TextureCache ::RunGarbageCollector() {
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer);
}
+
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
}
UnregisterImage(image_id);
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
+
if (total_used_memory < critical_memory) {
if (aggressive_mode) {
// Sink the aggresiveness.
@@ -136,7 +188,24 @@ void TextureCache ::RunGarbageCollector() {
return false;
};
- // Try to remove anything old enough and not high priority.
+ // Aggressively clear massive sparse textures
+ if (total_used_memory >= expected_memory) {
+ lru_cache.ForEachItemBelow(frame_tick, [&](ImageId image_id) {
+ auto& image = slot_images[image_id];
+ // Only target sparse textures that are old enough
+ if (lowmemorydevice &&
+ image.info.is_sparse &&
+ image.guest_size_bytes >= 256_MiB &&
+ image.allocation_tick < frame_tick - 3) {
+ LOG_DEBUG(HW_GPU, "GC targeting old sparse texture at 0x{:X} ({} MiB, age: {} frames)",
+ image.gpu_addr, image.guest_size_bytes / (1024 * 1024),
+ frame_tick - image.allocation_tick);
+ return Cleanup(image_id);
+ }
+ return false;
+ });
+ }
+
Configure(false);
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, Cleanup);
@@ -160,6 +229,7 @@ void TextureCache ::TickFrame() {
sentenced_framebuffers.Tick();
sentenced_image_view.Tick();
TickAsyncDecode();
+ TickAsyncUnswizzle();
runtime.TickFrame();
++frame_tick;
@@ -627,7 +697,6 @@ void TextureCache ::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t siz
UntrackImage(image, id);
}
}
-
if (True(image.flags & ImageFlagBits::Remapped)) {
continue;
}
@@ -1055,7 +1124,12 @@ void TextureCache ::RefreshContents(Image& image, ImageId image_id) {
// Only upload modified images
return;
}
+
image.flags &= ~ImageFlagBits::CpuModified;
+ if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) {
+ return;
+ }
+
TrackImage(image, image_id);
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {
@@ -1067,6 +1141,16 @@ void TextureCache ::RefreshContents(Image& image, ImageId image_id) {
QueueAsyncDecode(image, image_id);
return;
}
+ if (IsPixelFormatBCn(image.info.format) &&
+ image.info.type == ImageType::e3D &&
+ image.info.resources.levels == 1 &&
+ image.info.resources.layers == 1 &&
+ MapSizeBytes(image) >= gpu_unswizzle_maxsize &&
+ False(image.flags & ImageFlagBits::GpuModified)) {
+
+ QueueAsyncUnswizzle(image, image_id);
+ return;
+ }
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
UploadImageContents(image, staging);
runtime.InsertUploadMemoryBarrier();
@@ -1082,7 +1166,7 @@ void TextureCache ::UploadImageContents(Image& image, StagingBuffer& staging)
gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(),
VideoCommon::CacheType::NoTextureCache);
const auto uploads = FullUploadSwizzles(image.info);
- runtime.AccelerateImageUpload(image, staging, FixSmallVectorADL(uploads));
+ runtime.AccelerateImageUpload(image, staging, FixSmallVectorADL(uploads), 0, 0);
return;
}
@@ -1311,6 +1395,20 @@ void TextureCache ::QueueAsyncDecode(Image& image, ImageId image_id) {
texture_decode_worker.QueueWork(std::move(func));
}
+template ::QueueAsyncUnswizzle(Image& image, ImageId image_id) {
+ if (True(image.flags & ImageFlagBits::IsDecoding)) {
+ return;
+ }
+
+ image.flags |= ImageFlagBits::IsDecoding;
+
+ unswizzle_queue.push_back({
+ .image_id = image_id,
+ .info = image.info
+ });
+}
+
template ::TickAsyncDecode() {
bool has_uploads{};
@@ -1336,6 +1434,83 @@ void TextureCache ::TickAsyncDecode() {
}
}
+template ::TickAsyncUnswizzle() {
+ if (unswizzle_queue.empty()) {
+ return;
+ }
+
+ if(current_unswizzle_frame > 0) {
+ current_unswizzle_frame--;
+ return;
+ }
+
+ PendingUnswizzle& task = unswizzle_queue.front();
+ Image& image = slot_images[task.image_id];
+
+ if (!task.initialized) {
+ task.total_size = MapSizeBytes(image);
+ task.staging_buffer = runtime.UploadStagingBuffer(task.total_size, true);
+
+ const auto& info = image.info;
+ const u32 bytes_per_block = BytesPerBlock(info.format);
+ const u32 width_blocks = Common::DivCeil(info.size.width, 4u);
+ const u32 height_blocks = Common::DivCeil(info.size.height, 4u);
+
+ const u32 stride = width_blocks * bytes_per_block;
+ const u32 aligned_height = height_blocks;
+ task.bytes_per_slice = static_cast ::ScaleUp(Image& image) {
const bool has_copy = image.HasScaled();
@@ -1374,6 +1549,39 @@ ImageId TextureCache ::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
}
}
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
+
+ // For large sparse textures, aggressively clean up old allocations at same address
+ if (lowmemorydevice && info.is_sparse && CalculateGuestSizeInBytes(info) >= 256_MiB) {
+ const auto alloc_it = image_allocs_table.find(gpu_addr);
+ if (alloc_it != image_allocs_table.end()) {
+ const ImageAllocId alloc_id = alloc_it->second;
+ auto& alloc_images = slot_image_allocs[alloc_id].images;
+
+ // Collect old images at this address that were created more than 2 frames ago
+ boost::container::small_vector ::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DAddr cpu_addr) {
ImageInfo new_info = info;
const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
+
+ // Proactive cleanup for large sparse texture allocations
+ if (lowmemorydevice && new_info.is_sparse && size_bytes >= 256_MiB) {
+ const u64 estimated_alloc_size = size_bytes;
+
+ if (total_used_memory + estimated_alloc_size >= critical_memory) {
+ LOG_DEBUG(HW_GPU, "Large sparse texture allocation ({} MiB) - running aggressive GC. "
+ "Current memory: {} MiB, Critical: {} MiB",
+ size_bytes / (1024 * 1024),
+ total_used_memory / (1024 * 1024),
+ critical_memory / (1024 * 1024));
+ RunGarbageCollector();
+
+ // If still over threshold after GC, try one more aggressive pass
+ if (total_used_memory + estimated_alloc_size >= critical_memory) {
+ LOG_DEBUG(HW_GPU, "Still critically low on memory, running second GC pass");
+ RunGarbageCollector();
+ }
+ }
+ }
+
const bool broken_views = runtime.HasBrokenTextureViewFormats();
const bool native_bgr = runtime.HasNativeBgr();
join_overlap_ids.clear();
@@ -1485,6 +1714,8 @@ ImageId TextureCache ::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id];
+ new_image.allocation_tick = frame_tick;
+
if (!gpu_memory->IsContinuousRange(new_image.gpu_addr, new_image.guest_size_bytes) &&
new_info.is_sparse) {
new_image.flags |= ImageFlagBits::Sparse;
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 5146a8c291..42f1a158d9 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -129,6 +129,17 @@ class TextureCache : public VideoCommon::ChannelSetupCaches