[vk, spir-v] Refining the path for Array 1D emulation and R32 Uint handling consistency
This commit is contained in:
parent
21575a7cc2
commit
92e9b82f6e
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
|
@ -318,13 +321,23 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I
|
|||
return;
|
||||
}
|
||||
|
||||
// Mobile GPUs: 1D textures emulated as 2D with height=1
|
||||
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
|
||||
|
||||
Id result_type{};
|
||||
switch (info.type) {
|
||||
case TextureType::Buffer:
|
||||
case TextureType::Color1D: {
|
||||
result_type = ctx.U32[1];
|
||||
break;
|
||||
}
|
||||
case TextureType::Color1D:
|
||||
if (emulate_1d) {
|
||||
// Treat as 2D: offset needs Y component
|
||||
offset = ctx.OpCompositeConstruct(ctx.U32[2], offset, ctx.u32_zero_value);
|
||||
result_type = ctx.U32[2];
|
||||
} else {
|
||||
result_type = ctx.U32[1];
|
||||
}
|
||||
break;
|
||||
case TextureType::ColorArray1D:
|
||||
offset = ctx.OpCompositeConstruct(ctx.U32[2], offset, ctx.u32_zero_value);
|
||||
[[fallthrough]];
|
||||
|
|
@ -348,6 +361,29 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I
|
|||
}
|
||||
coords = ctx.OpIAdd(result_type, coords, offset);
|
||||
}
|
||||
|
||||
// Helper: Convert 1D coordinates to 2D when emulating 1D textures on mobile GPUs
|
||||
[[nodiscard]] Id AdjustCoordinatesForEmulation(EmitContext& ctx, const IR::TextureInstInfo& info,
|
||||
Id coords) {
|
||||
if (!ctx.profile.needs_1d_texture_emulation) {
|
||||
return coords;
|
||||
}
|
||||
|
||||
switch (info.type) {
|
||||
case TextureType::Color1D:
|
||||
// Convert scalar → vec2(x, 0.0)
|
||||
return ctx.OpCompositeConstruct(ctx.F32[2], coords, ctx.f32_zero_value);
|
||||
case TextureType::ColorArray1D:
|
||||
// Convert vec2(x, layer) → vec3(x, 0.0, layer)
|
||||
// ColorArray1D coords are always vec2 in IR
|
||||
const Id x = ctx.OpCompositeExtract(ctx.F32[1], coords, 0);
|
||||
const Id layer = ctx.OpCompositeExtract(ctx.F32[1], coords, 1);
|
||||
return ctx.OpCompositeConstruct(ctx.F32[3], x, ctx.f32_zero_value, layer);
|
||||
default:
|
||||
return coords;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
|
||||
|
|
@ -449,6 +485,7 @@ Id EmitBoundImageWrite(EmitContext&) {
|
|||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id bias_lc, const IR::Value& offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
if (ctx.stage == Stage::Fragment) {
|
||||
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
|
||||
bias_lc, offset);
|
||||
|
|
@ -470,6 +507,7 @@ Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
|
|||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id lod, const IR::Value& offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const ImageOperands operands(ctx, false, true, false, lod, offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
|
||||
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
|
||||
|
|
@ -479,6 +517,7 @@ Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value&
|
|||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id coords, Id dref, Id bias_lc, const IR::Value& offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
if (ctx.stage == Stage::Fragment) {
|
||||
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0,
|
||||
bias_lc, offset);
|
||||
|
|
@ -500,6 +539,7 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
|
|||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id coords, Id dref, Id lod, const IR::Value& offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const ImageOperands operands(ctx, false, true, false, lod, offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
|
||||
&EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
|
||||
|
|
@ -509,6 +549,7 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
|
|||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
const IR::Value& offset, const IR::Value& offset2) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const ImageOperands operands(ctx, offset, offset2);
|
||||
if (ctx.profile.need_gather_subpixel_offset) {
|
||||
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
|
||||
|
|
@ -521,6 +562,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
|
|||
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
const IR::Value& offset, const IR::Value& offset2, Id dref) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const ImageOperands operands(ctx, offset, offset2);
|
||||
if (ctx.profile.need_gather_subpixel_offset) {
|
||||
coords = ImageGatherSubpixelOffset(ctx, info, TextureImage(ctx, info, index), coords);
|
||||
|
|
@ -533,6 +575,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
|||
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
|
||||
Id lod, Id ms) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
AddOffsetToCoordinates(ctx, info, coords, offset);
|
||||
if (info.type == TextureType::Buffer) {
|
||||
lod = Id{};
|
||||
|
|
@ -559,9 +602,20 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
|
|||
return uses_lod ? ctx.OpImageQuerySizeLod(type, image, lod)
|
||||
: ctx.OpImageQuerySize(type, image);
|
||||
}};
|
||||
|
||||
// Mobile GPUs: 1D textures emulated as 2D, query returns vec2 instead of scalar
|
||||
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
|
||||
|
||||
switch (info.type) {
|
||||
case TextureType::Color1D:
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
|
||||
if (emulate_1d) {
|
||||
// Query as 2D, extract only X component for 1D size
|
||||
const Id size_2d = query(ctx.U32[2]);
|
||||
const Id width = ctx.OpCompositeExtract(ctx.U32[1], size_2d, 0);
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], width, zero, zero, mips());
|
||||
} else {
|
||||
return ctx.OpCompositeConstruct(ctx.U32[4], query(ctx.U32[1]), zero, zero, mips());
|
||||
}
|
||||
case TextureType::ColorArray1D:
|
||||
case TextureType::Color2D:
|
||||
case TextureType::ColorCube:
|
||||
|
|
@ -579,6 +633,7 @@ Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& i
|
|||
|
||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const Id zero{ctx.f32_zero_value};
|
||||
const Id sampler{Texture(ctx, info, index)};
|
||||
return ctx.OpCompositeConstruct(ctx.F32[4], ctx.OpImageQueryLod(ctx.F32[2], sampler, coords),
|
||||
|
|
@ -588,6 +643,7 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
|
|||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id derivatives, const IR::Value& offset, Id lod_clamp) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const auto operands = info.num_derivatives == 3
|
||||
? ImageOperands(ctx, info.has_lod_clamp != 0, derivatives,
|
||||
ctx.Def(offset), {}, lod_clamp)
|
||||
|
|
@ -600,6 +656,7 @@ Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, I
|
|||
|
||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
if (info.image_format == ImageFormat::Typeless && !ctx.profile.support_typeless_image_loads) {
|
||||
LOG_WARNING(Shader_SPIRV, "Typeless image read not supported by host");
|
||||
return ctx.ConstantNull(ctx.U32[4]);
|
||||
|
|
@ -616,6 +673,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id co
|
|||
|
||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id color) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
coords = AdjustCoordinatesForEmulation(ctx, info, coords);
|
||||
const auto [image, is_integer] = Image(ctx, index, info);
|
||||
if (!is_integer) {
|
||||
color = ctx.OpBitcast(ctx.F32[4], color);
|
||||
|
|
|
|||
|
|
@ -37,6 +37,13 @@ Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
|
|||
// Mobile GPUs lack Sampled1D SPIR-V capability - emulate 1D as 2D with array layer
|
||||
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
|
||||
|
||||
// Debug log for 1D emulation
|
||||
if (desc.type == TextureType::Color1D || desc.type == TextureType::ColorArray1D) {
|
||||
LOG_WARNING(Shader_SPIRV, "ImageType(texture): Creating {} texture, emulate_1d={}",
|
||||
desc.type == TextureType::Color1D ? "Color1D" : "ColorArray1D",
|
||||
emulate_1d);
|
||||
}
|
||||
|
||||
switch (desc.type) {
|
||||
case TextureType::Color1D:
|
||||
return emulate_1d ? ctx.TypeImage(type, spv::Dim::Dim2D, depth, false, false, 1, format)
|
||||
|
|
@ -87,6 +94,13 @@ Id ImageType(EmitContext& ctx, const ImageDescriptor& desc, Id sampled_type) {
|
|||
const spv::ImageFormat format{GetImageFormat(desc.format)};
|
||||
const bool emulate_1d = ctx.profile.needs_1d_texture_emulation;
|
||||
|
||||
// Debug log for 1D emulation
|
||||
if (desc.type == TextureType::Color1D || desc.type == TextureType::ColorArray1D) {
|
||||
LOG_WARNING(Shader_SPIRV, "ImageType: Creating {} image, emulate_1d={}",
|
||||
desc.type == TextureType::Color1D ? "Color1D" : "ColorArray1D",
|
||||
emulate_1d);
|
||||
}
|
||||
|
||||
switch (desc.type) {
|
||||
case TextureType::Color1D:
|
||||
return emulate_1d ? ctx.TypeImage(sampled_type, spv::Dim::Dim2D, false, false, false, 2, format)
|
||||
|
|
|
|||
|
|
@ -177,8 +177,6 @@ try
|
|||
|
||||
RendererVulkan::~RendererVulkan() {
|
||||
scheduler.RegisterOnSubmit([] {});
|
||||
// Acquire submit_mutex before WaitIdle to prevent simultaneous queue access
|
||||
std::scoped_lock lock{scheduler.submit_mutex};
|
||||
void(device.GetLogical().WaitIdle());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,8 +30,7 @@ BlitScreen::~BlitScreen() = default;
|
|||
void BlitScreen::WaitIdle() {
|
||||
present_manager.WaitPresent();
|
||||
scheduler.Finish();
|
||||
// Note: scheduler.Finish() already waits for GPU and synchronizes submit_mutex
|
||||
// Calling device.WaitIdle() here causes threading errors (simultaneous queue access)
|
||||
device.GetLogical().WaitIdle();
|
||||
}
|
||||
|
||||
void BlitScreen::SetWindowAdaptPass() {
|
||||
|
|
|
|||
|
|
@ -2121,18 +2121,23 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
|
|||
}
|
||||
const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
|
||||
|
||||
// Workaround: Nintendo Switch games incorrectly use R32_UINT textures with float samplers
|
||||
// This causes validation errors and undefined behavior (flickering, missing geometry)
|
||||
// Reinterpret R32_UINT as R16_SFLOAT for sampled images (R32_SFLOAT lacks LINEAR filter support on Adreno)
|
||||
// Workaround: Some Switch games incorrectly use R32_UINT textures with float samplers
|
||||
// causing flickering/missing geometry. However, glyph atlases and lookup tables
|
||||
// CORRECTLY use R32_UINT for integer data - reinterpreting breaks text rendering.
|
||||
// Conservative heuristic: Only reinterpret large textures (likely geometry/effects)
|
||||
VkFormat view_format = format_info.format;
|
||||
if (view_format == VK_FORMAT_R32_UINT &&
|
||||
!info.IsRenderTarget() &&
|
||||
(ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_SAMPLED_BIT)) {
|
||||
// Only reinterpret if NOT used as storage image (storage requires matching types)
|
||||
// Skip small textures (likely atlases, lookup tables, or integer data)
|
||||
const bool is_likely_atlas = info.size.width <= 1024 || info.size.height <= 1024;
|
||||
const bool is_storage = (ImageUsageFlags(format_info, format) & VK_IMAGE_USAGE_STORAGE_BIT) != 0;
|
||||
if (!is_storage) {
|
||||
view_format = VK_FORMAT_R16_SFLOAT;
|
||||
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT as R16_SFLOAT for sampled image compatibility (LINEAR filter support)");
|
||||
|
||||
// Only reinterpret large textures that are NOT storage and NOT likely atlases
|
||||
if (!is_storage && !is_likely_atlas) {
|
||||
view_format = VK_FORMAT_R32_SFLOAT;
|
||||
LOG_DEBUG(Render_Vulkan, "Reinterpreting R32_UINT→R32_SFLOAT for {}x{} texture",
|
||||
info.size.width, info.size.height);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue