From 7ac1e7e3791243ab580b37fd7e244982d14e66d9 Mon Sep 17 00:00:00 2001 From: xbzk Date: Sat, 17 Jan 2026 19:20:10 -0300 Subject: [PATCH] WIP: [spirv] attempt to cover bindless textures --- .../backend/spirv/emit_spirv_image.cpp | 174 +++++++++++-- .../backend/spirv/emit_spirv_instructions.h | 108 ++++++++- .../backend/spirv/spirv_emit_context.cpp | 26 ++ .../backend/spirv/spirv_emit_context.h | 6 + src/shader_recompiler/ir_opt/texture_pass.cpp | 228 ++++++++++++------ 5 files changed, 429 insertions(+), 113 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp index 945cdb42bc..d5e59fd239 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -350,52 +353,175 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I } } // Anonymous namespace -Id EmitBindlessImageSampleImplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Implements bindless image sample with implicit LOD using the macro-compatible signature +Id EmitBindlessImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& bias_lc, const IR::Value& offset) { + const IR::Value& bindless_handle = handle; + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(bindless_handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + if (ctx.stage == Stage::Fragment) { + // ImageOperands(EmitContext&, bool has_bias, bool has_lod, bool has_lod_clamp, Id lod, const IR::Value& offset) + const Id bias = ctx.Def(bias_lc); + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias, offset); + return Emit(&EmitContext::OpImageSparseSampleImplicitLod, + &EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], + sampler, coords, operands.MaskOptional(), operands.Span()); + } else { + const Id lod = ctx.Const(0.0f); + const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + sampler, coords, operands.Mask(), operands.Span()); + } } -Id EmitBindlessImageSampleExplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Implements bindless image sample with explicit LOD using the macro-compatible signature +Id EmitBindlessImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& lod, const IR::Value& offset) { + const IR::Value& bindless_handle = handle; + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(bindless_handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + // ImageOperands(EmitContext&, bool has_bias, bool has_lod, bool has_lod_clamp, Id lod, const IR::Value& offset) + const Id lod_id = ctx.Def(lod); + const ImageOperands operands(ctx, false, true, false, lod_id, offset); + return Emit(&EmitContext::OpImageSparseSampleExplicitLod, + &EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], + sampler, coords, operands.Mask(), operands.Span()); } -Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Multi-argument version for bindless image sample dref implicit lod +Id EmitBindlessImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, Id dref, const IR::Value& bias_lc, const IR::Value& offset) { + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + if (ctx.stage == Stage::Fragment) { + const Id bias = ctx.Def(bias_lc); + const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias, offset); + return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod, + &EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1], + sampler, coords, dref, operands.MaskOptional(), operands.Span()); + } else { + const Id lod = ctx.Const(0.0f); + const ImageOperands operands(ctx, false, true, false, lod, offset); + return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, + &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], + sampler, coords, dref, operands.Mask(), operands.Span()); + } } -Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Implements bindless image shadow sample with explicit LOD using the bindless handle as index +Id EmitBindlessImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, Id dref, const IR::Value& lod, const IR::Value& offset) { + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + const Id lod_id = ctx.Def(lod); + const ImageOperands operands(ctx, false, true, false, lod_id, offset); + return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod, + &EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1], + sampler, coords, dref, operands.Mask(), operands.Span()); } -Id EmitBindlessImageGather(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Multi-argument version for bindless image gather +Id EmitBindlessImageGather(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& offset, const IR::Value& offset2) { + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + const ImageOperands operands(ctx, offset, offset2); + Id gather_coords = coords; + if (ctx.profile.need_gather_subpixel_offset) { + gather_coords = ImageGatherSubpixelOffset(ctx, info, sampler, coords); + } + return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst, + ctx.F32[4], sampler, gather_coords, ctx.Const(info.gather_component), + operands.MaskOptional(), operands.Span()); } -Id EmitBindlessImageGatherDref(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Multi-argument version for bindless image gather dref +Id EmitBindlessImageGatherDref(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& offset, const IR::Value& offset2, Id dref) { + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + const ImageOperands operands(ctx, offset, offset2); + Id gather_coords = coords; + if (ctx.profile.need_gather_subpixel_offset) { + gather_coords = ImageGatherSubpixelOffset(ctx, info, sampler, coords); + } + return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst, + ctx.F32[4], sampler, gather_coords, dref, operands.MaskOptional(), + operands.Span()); } -Id EmitBindlessImageFetch(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle, const IR::Value& offset, const IR::Value& ms, const IR::Value& lod) +Id EmitBindlessImageFetch(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& offset, Id lod, const IR::Value& ms) { + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + const Id ms_id = ctx.Def(ms); + const ImageOperands operands(lod, ms_id); + const Id result_type = ctx.F32[4]; + Id color = Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, result_type, sampler, coords, operands.MaskOptional(), operands.Span()); + return color; } -Id EmitBindlessImageQueryDimensions(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle, const IR::Value& lod) +Id EmitBindlessImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, Id handle, Id lod, Id skip_mips) { + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, handle); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + // skip_mips is not used in this implementation + return ctx.OpImageQuerySizeLod(ctx.U32[2], sampler, lod); } -Id EmitBindlessImageQueryLod(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle) +Id EmitBindlessImageQueryLod(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle) { + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + return ctx.OpImageQueryLod(ctx.U32[2], sampler, coords); } -Id EmitBindlessImageGradient(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle, const IR::Value& derivatives, const IR::Value& offset, const IR::Value& lod_clamp) +Id EmitBindlessImageGradient(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& derivatives, const IR::Value& offset, const IR::Value& lod_clamp) { + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + const Id deriv_id = ctx.Def(derivatives); + const Id lod_clamp_id = ctx.Def(lod_clamp); + const ImageOperands operands(ctx, false, deriv_id, 2, offset, lod_clamp_id); + // Directly call the robust wrappers with macro-compatible signature + if (inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)) { + return ctx.OpImageSparseSampleGrad(ctx.F32[4], sampler, coords, deriv_id, deriv_id, operands.MaskOptional(), operands.Span()); + } else { + return ctx.OpImageSampleGrad(ctx.F32[4], sampler, coords, deriv_id, deriv_id, operands.MaskOptional(), operands.Span()); + } } -Id EmitBindlessImageRead(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle) +Id EmitBindlessImageRead(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle) { + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + return ctx.OpImageRead(ctx.U32[4], sampler, coords); } -Id EmitBindlessImageWrite(EmitContext&) { - throw LogicError("Unreachable instruction"); +// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle, Id color) +void EmitBindlessImageWrite(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, Id color) { + const auto info = inst->Flags(); + const TextureDefinition& def = ctx.textures.at(info.descriptor_index); + const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle)); + const Id sampler = ctx.OpLoad(def.sampled_type, pointer); + ctx.OpImageWrite(sampler, coords, color); } Id EmitBoundImageSampleImplicitLod(EmitContext&) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index 171977c85c..a75013c941 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -513,18 +513,102 @@ Id EmitConvertU8U32(EmitContext& ctx, Id value); Id EmitConvertU32U8(EmitContext& ctx, Id value); Id EmitConvertS32S8(EmitContext& ctx, Id value); Id EmitConvertS32S16(EmitContext& ctx, Id value); -Id EmitBindlessImageSampleImplicitLod(EmitContext&); -Id EmitBindlessImageSampleExplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&); -Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&); -Id EmitBindlessImageGather(EmitContext&); -Id EmitBindlessImageGatherDref(EmitContext&); -Id EmitBindlessImageFetch(EmitContext&); -Id EmitBindlessImageQueryDimensions(EmitContext&); -Id EmitBindlessImageQueryLod(EmitContext&); -Id EmitBindlessImageGradient(EmitContext&); -Id EmitBindlessImageRead(EmitContext&); -Id EmitBindlessImageWrite(EmitContext&); +// Bindless image single-argument stubs (for macro compatibility) +Id EmitBindlessImageSampleImplicitLod( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle, // Opaque + const IR::Value& bias_lc, // Opaque + const IR::Value& offset // Opaque +); +// Real implementation for BindlessImageSampleExplicitLod +Id EmitBindlessImageSampleExplicitLod( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle, // Opaque + const IR::Value& lod, // Opaque + const IR::Value& offset // Opaque +); +// Real implementation for BindlessImageSampleDrefImplicitLod +Id EmitBindlessImageSampleDrefImplicitLod( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle, // Opaque + Id dref, // F32 + const IR::Value& bias_lc, // Opaque + const IR::Value& offset // Opaque +); +// Real implementation for BindlessImageSampleDrefExplicitLod +Id EmitBindlessImageSampleDrefExplicitLod( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle, // Opaque + Id dref, // F32 + const IR::Value& lod, // Opaque + const IR::Value& offset // Opaque +); +// Real implementation for BindlessImageGather +Id EmitBindlessImageGather( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle, // Opaque + const IR::Value& offset, // Opaque + const IR::Value& offset2 // Opaque +); +// Real implementation for BindlessImageGatherDref +Id EmitBindlessImageGatherDref( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle, // Opaque + const IR::Value& offset, // Opaque + const IR::Value& offset2, // Opaque + Id dref // F32 +); +// Real implementation for BindlessImageFetch +Id EmitBindlessImageFetch( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle, // Opaque + const IR::Value& offset, // Opaque + Id lod, // U32 + const IR::Value& ms // Opaque +); +// Real implementation for BindlessImageQueryDimensions +Id EmitBindlessImageQueryDimensions( + EmitContext& ctx, IR::Inst* inst, + Id handle, // U32 + Id lod, // U32 + Id skip_mips // U1 +); +// Real implementation for BindlessImageQueryLod +Id EmitBindlessImageQueryLod( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle // Opaque +); +// Real implementation for BindlessImageGradient +Id EmitBindlessImageGradient( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle, // Opaque + const IR::Value& derivatives, // Opaque + const IR::Value& offset, // Opaque + const IR::Value& lod_clamp // Opaque +); +// Real implementation for BindlessImageRead +Id EmitBindlessImageRead( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle // Opaque +); +// Real implementation for BindlessImageWrite +void EmitBindlessImageWrite( + EmitContext& ctx, IR::Inst* inst, + Id coords, // U32 + const IR::Value& handle, // Opaque + Id color // U32x4 +); + Id EmitBoundImageSampleImplicitLod(EmitContext&); Id EmitBoundImageSampleExplicitLod(EmitContext&); Id EmitBoundImageSampleDrefImplicitLod(EmitContext&); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index dd4a9e2d03..97759f8cd3 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -1696,4 +1696,30 @@ void EmitContext::DefineOutputs(const IR::Program& program) { } } +Id EmitContext::OpImageSampleGrad(Id result_type, Id sampled_image, Id coordinate, Id dPdx, Id dPdy, std::optional mask, std::span operands) { + // Sirit does not provide OpImageSampleGrad directly, use OpImageSampleExplicitLod with Grad mask + std::vector args = {dPdx, dPdy}; + if (operands.size() > 0) { + args.insert(args.end(), operands.begin(), operands.end()); + } + if (mask.has_value()) { + return this->OpImageSampleExplicitLod(result_type, sampled_image, coordinate, static_cast(spv::ImageOperandsMask::Grad | *mask), args); + } else { + return this->OpImageSampleExplicitLod(result_type, sampled_image, coordinate, spv::ImageOperandsMask::Grad, args); + } +} + +Id EmitContext::OpImageSparseSampleGrad(Id result_type, Id sampled_image, Id coordinate, Id dPdx, Id dPdy, std::optional mask, std::span operands) { + // Sirit does not provide OpImageSparseSampleGrad directly, use OpImageSparseSampleExplicitLod with Grad mask + std::vector args = {dPdx, dPdy}; + if (operands.size() > 0) { + args.insert(args.end(), operands.begin(), operands.end()); + } + if (mask.has_value()) { + return this->OpImageSparseSampleExplicitLod(result_type, sampled_image, coordinate, static_cast(spv::ImageOperandsMask::Grad | *mask), args); + } else { + return this->OpImageSparseSampleExplicitLod(result_type, sampled_image, coordinate, spv::ImageOperandsMask::Grad, args); + } +} + } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 66cdb1d3db..3cd4b565d1 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -361,6 +364,9 @@ public: Id load_const_func_u32x2{}; Id load_const_func_u32x4{}; + Id OpImageSampleGrad(Id result_type, Id sampled_image, Id coordinate, Id dPdx, Id dPdy, std::optional mask, std::span operands); + Id OpImageSparseSampleGrad(Id result_type, Id sampled_image, Id coordinate, Id dPdx, Id dPdy, std::optional mask, std::span operands); + private: void DefineCommonTypes(const Info& info); void DefineCommonConstants(); diff --git a/src/shader_recompiler/ir_opt/texture_pass.cpp b/src/shader_recompiler/ir_opt/texture_pass.cpp index 5c62cad939..1ee6b7db97 100644 --- a/src/shader_recompiler/ir_opt/texture_pass.cpp +++ b/src/shader_recompiler/ir_opt/texture_pass.cpp @@ -43,6 +43,24 @@ using TextureInstVector = boost::container::small_vector; constexpr u32 DESCRIPTOR_SIZE = 8; constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast(std::countr_zero(DESCRIPTOR_SIZE)); +// Helper to reduce repeated construction of ConstBufferAddr +static ConstBufferAddr MakeConstBufferAddr(u32 index_val, u32 base_offset, IR::U32 dynamic_offset, + u32 count = 8, bool has_secondary = false, + u32 shift_left = 0, u32 secondary_index = 0, + u32 secondary_offset = 0, u32 secondary_shift_left = 0) { + return ConstBufferAddr{ + .index = index_val, + .offset = base_offset, + .shift_left = shift_left, + .secondary_index = secondary_index, + .secondary_offset = secondary_offset, + .secondary_shift_left = secondary_shift_left, + .dynamic_offset = dynamic_offset, + .count = count, + .has_secondary = has_secondary, + }; +} + IR::Opcode IndexedInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::BindlessImageSampleImplicitLod: @@ -178,11 +196,11 @@ bool IsTextureInstruction(const IR::Inst& inst) { return IndexedInstruction(inst) != IR::Opcode::Void; } -std::optional TryGetConstBuffer(const IR::Inst* inst, Environment& env); +std::optional TryGetConstBuffer(const IR::Inst* inst, Environment& env, const IR::Program* program); -std::optional Track(const IR::Value& value, Environment& env) { +std::optional Track(const IR::Value& value, Environment& env, const IR::Program* program) { return IR::BreadthFirstSearch( - value, [&env](const IR::Inst* inst) { return TryGetConstBuffer(inst, env); }); + value, [&env, program](const IR::Inst* inst) { return TryGetConstBuffer(inst, env, program); }); } std::optional TryGetConstant(IR::Value& value, Environment& env) { @@ -205,14 +223,67 @@ std::optional TryGetConstant(IR::Value& value, Environment& env) { const auto offset_number = offset.U32(); return env.ReadCbufValue(index_number, offset_number); } +// Helper: find the source value written into a local (WriteLocal) for a given +// local index. Returns the writer's source value or nullopt. +static std::optional FindLocalWriterSource(const IR::Value& local_idx, const IR::Program* program) { + if (!local_idx.IsImmediate() || !program) { + return std::nullopt; + } + const u32 local_word = local_idx.U32(); + for (IR::Block* const block : program->post_order_blocks) { + for (IR::Inst& writer : block->Instructions()) { + if (writer.GetOpcode() != IR::Opcode::WriteLocal) { + continue; + } + const IR::Value writer_idx{writer.Arg(0)}; + if (!writer_idx.IsImmediate() || writer_idx.U32() != local_word) { + continue; + } + return IR::Value{writer.Arg(1)}; + } + } + return std::nullopt; +} + +// Helper: if the supplied IAdd32 inst composes a ConstBufferAddr (one side +// tracks to a ConstBufferAddr), return that combined address with the other +// side as the dynamic offset. +static std::optional TryGetConstBufferFromIAdd(const IR::Inst* inst, Environment& env, + const IR::Program* program) { + IR::Value a{inst->Arg(0)}; + IR::Value b{inst->Arg(1)}; + std::optional lhs{Track(a, env, program)}; + std::optional rhs{Track(b, env, program)}; + if (lhs && !rhs) { + lhs->dynamic_offset = IR::U32{b}; + return lhs; + } + if (rhs && !lhs) { + rhs->dynamic_offset = IR::U32{a}; + return rhs; + } + return std::nullopt; +} + +std::optional TryGetConstBuffer(const IR::Inst* inst, Environment& env, const IR::Program* program) { + // Handle LoadLocal uniformly by finding the writer and tracking its source. + if (inst->GetOpcode() == IR::Opcode::LoadLocal) { + const IR::Value local_idx{inst->Arg(0)}; + if (const auto src = FindLocalWriterSource(local_idx, program)) { + return Track(*src, env, program); + } + return std::nullopt; + } -std::optional TryGetConstBuffer(const IR::Inst* inst, Environment& env) { switch (inst->GetOpcode()) { default: return std::nullopt; + case IR::Opcode::IAdd32: { + return TryGetConstBufferFromIAdd(inst, env, program); + } case IR::Opcode::BitwiseOr32: { - std::optional lhs{Track(inst->Arg(0), env)}; - std::optional rhs{Track(inst->Arg(1), env)}; + std::optional lhs{Track(inst->Arg(0), env, program)}; + std::optional rhs{Track(inst->Arg(1), env, program)}; if (!lhs || !rhs) { return std::nullopt; } @@ -225,29 +296,19 @@ std::optional TryGetConstBuffer(const IR::Inst* inst, Environme if (lhs->shift_left > 0 || lhs->index > rhs->index || lhs->offset > rhs->offset) { std::swap(lhs, rhs); } - return ConstBufferAddr{ - .index = lhs->index, - .offset = lhs->offset, - .shift_left = lhs->shift_left, - .secondary_index = rhs->index, - .secondary_offset = rhs->offset, - .secondary_shift_left = rhs->shift_left, - .dynamic_offset = {}, - .count = 1, - .has_secondary = true, - }; + return MakeConstBufferAddr(lhs->index, lhs->offset, IR::U32{}, 1, true, lhs->shift_left, + rhs->index, rhs->offset, rhs->shift_left); } case IR::Opcode::ShiftLeftLogical32: { const IR::Value shift{inst->Arg(1)}; if (!shift.IsImmediate()) { return std::nullopt; } - std::optional lhs{Track(inst->Arg(0), env)}; + std::optional lhs{Track(inst->Arg(0), env, program)}; if (lhs) { lhs->shift_left = shift.U32(); } return lhs; - break; } case IR::Opcode::BitwiseAnd32: { IR::Value op1{inst->Arg(0)}; @@ -256,32 +317,31 @@ std::optional TryGetConstBuffer(const IR::Inst* inst, Environme std::swap(op1, op2); } if (!op2.IsImmediate() && !op1.IsImmediate()) { - do { - auto try_index = TryGetConstant(op1, env); - if (try_index) { - op1 = op2; - op2 = IR::Value{*try_index}; - break; - } + auto try_index = TryGetConstant(op1, env); + if (try_index) { + op1 = op2; + op2 = IR::Value{*try_index}; + } else { auto try_index_2 = TryGetConstant(op2, env); if (try_index_2) { op2 = IR::Value{*try_index_2}; - break; + } else { + return std::nullopt; } - return std::nullopt; - } while (false); + } } - std::optional lhs{Track(op1, env)}; + std::optional lhs{Track(op1, env, program)}; if (lhs) { lhs->shift_left = static_cast(std::countr_zero(op2.U32())); } return lhs; - break; } case IR::Opcode::GetCbufU32x2: case IR::Opcode::GetCbufU32: break; } + + // Handle GetCbufU32 / GetCbufU32x2 indexed accesses. const IR::Value index{inst->Arg(0)}; const IR::Value offset{inst->Arg(1)}; if (!index.IsImmediate()) { @@ -290,68 +350,82 @@ std::optional TryGetConstBuffer(const IR::Inst* inst, Environme return std::nullopt; } if (offset.IsImmediate()) { - return ConstBufferAddr{ - .index = index.U32(), - .offset = offset.U32(), - .shift_left = 0, - .secondary_index = 0, - .secondary_offset = 0, - .secondary_shift_left = 0, - .dynamic_offset = {}, - .count = 1, - .has_secondary = false, - }; + return MakeConstBufferAddr(index.U32(), offset.U32(), IR::U32{}, 1, false); } + IR::Inst* const offset_inst{offset.InstRecursive()}; + // If the offset is loaded from a local, try to find the writer and analyze it. + if (offset_inst->GetOpcode() == IR::Opcode::LoadLocal) { + const IR::Value local_idx{offset_inst->Arg(0)}; + if (const auto writer_src = FindLocalWriterSource(local_idx, program)) { + const IR::Value src{*writer_src}; + if (src.IsImmediate()) { + return MakeConstBufferAddr(index.U32(), src.U32(), IR::U32{}); + } + IR::Inst* const src_inst{src.InstRecursive()}; + if (src_inst->GetOpcode() == IR::Opcode::IAdd32) { + // Extract immediate base + dynamic offset from the IAdd32 writer. + if (src_inst->Arg(0).IsImmediate()) { + return MakeConstBufferAddr(index.U32(), src_inst->Arg(0).U32(), IR::U32{src_inst->Arg(1)}); + } else if (src_inst->Arg(1).IsImmediate()) { + return MakeConstBufferAddr(index.U32(), src_inst->Arg(1).U32(), IR::U32{src_inst->Arg(0)}); + } + } + } + return std::nullopt; + } + + // Otherwise the offset must be an IAdd32 (base + dynamic) to be supported. if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) { return std::nullopt; } - u32 base_offset{}; - IR::U32 dynamic_offset; if (offset_inst->Arg(0).IsImmediate()) { - base_offset = offset_inst->Arg(0).U32(); - dynamic_offset = IR::U32{offset_inst->Arg(1)}; - } else if (offset_inst->Arg(1).IsImmediate()) { - base_offset = offset_inst->Arg(1).U32(); - dynamic_offset = IR::U32{offset_inst->Arg(0)}; - } else { - return std::nullopt; + return MakeConstBufferAddr(index.U32(), offset_inst->Arg(0).U32(), IR::U32{offset_inst->Arg(1)}); } - return ConstBufferAddr{ - .index = index.U32(), - .offset = base_offset, - .shift_left = 0, - .secondary_index = 0, - .secondary_offset = 0, - .secondary_shift_left = 0, - .dynamic_offset = dynamic_offset, - .count = 8, - .has_secondary = false, - }; + if (offset_inst->Arg(1).IsImmediate()) { + return MakeConstBufferAddr(index.U32(), offset_inst->Arg(1).U32(), IR::U32{offset_inst->Arg(0)}); + } + return std::nullopt; } -TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) { +TextureInst MakeInst(Environment& env, IR::Program& program, IR::Block* block, IR::Inst& inst) { ConstBufferAddr addr; if (IsBindless(inst)) { - const std::optional track_addr{Track(inst.Arg(0), env)}; + const std::optional track_addr{Track(inst.Arg(0), env, &program)}; if (!track_addr) { - throw NotImplementedException("Failed to track bindless texture constant buffer"); + std::string arg0_str; + const IR::Inst* arg0_producer = nullptr; + if (inst.Arg(0).IsImmediate()) { + arg0_str = std::to_string(inst.Arg(0).U32()); + } else { + arg0_producer = inst.Arg(0).InstRecursive(); + arg0_str = "ptr=" + std::to_string(reinterpret_cast(arg0_producer)) + ", op=" + std::to_string(static_cast(arg0_producer->GetOpcode())); + } + std::string arg1_str; + const IR::Inst* arg1_producer = nullptr; + if (inst.NumArgs() > 1) { + if (inst.Arg(1).IsImmediate()) { + arg1_str = std::to_string(inst.Arg(1).U32()); + } else { + arg1_producer = inst.Arg(1).InstRecursive(); + arg1_str = "ptr=" + std::to_string(reinterpret_cast(arg1_producer)) + ", op=" + std::to_string(static_cast(arg1_producer->GetOpcode())); + } + } + LOG_ERROR(HW_GPU, "MakeInst: Failed to track bindless texture constant buffer: opcode={}, Arg(0)={}, Arg(1)={}", + inst.GetOpcode(), arg0_str.c_str(), arg1_str.c_str()); + if (arg0_producer) { + LOG_ERROR(HW_GPU, "MakeInst: Arg(0) producer opcode={} @ {}", arg0_producer->GetOpcode(), reinterpret_cast(arg0_producer)); + } + if (arg1_producer) { + LOG_ERROR(HW_GPU, "MakeInst: Arg(1) producer opcode={} @ {}", arg1_producer->GetOpcode(), reinterpret_cast(arg1_producer)); + } + throw NotImplementedException("MakeInst: Failed to track bindless texture constant buffer"); } else { addr = *track_addr; } } else { - addr = ConstBufferAddr{ - .index = env.TextureBoundBuffer(), - .offset = inst.Arg(0).U32(), - .shift_left = 0, - .secondary_index = 0, - .secondary_offset = 0, - .secondary_shift_left = 0, - .dynamic_offset = {}, - .count = 1, - .has_secondary = false, - }; + addr = MakeConstBufferAddr(env.TextureBoundBuffer(), inst.Arg(0).U32(), IR::U32{}, 1, false); } return TextureInst{ .cbuf = addr, @@ -534,7 +608,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo if (!IsTextureInstruction(inst)) { continue; } - to_replace.push_back(MakeInst(env, block, inst)); + to_replace.push_back(MakeInst(env, program, block, inst)); } } // Sort instructions to visit textures by constant buffer index, then by offset