WIP: [spirv] attempt to cover bindless textures

This commit is contained in:
xbzk 2026-01-17 19:20:10 -03:00 committed by crueter
parent b9da45cb1f
commit 7ac1e7e379
5 changed files with 429 additions and 113 deletions

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -350,52 +353,175 @@ void AddOffsetToCoordinates(EmitContext& ctx, const IR::TextureInstInfo& info, I
}
} // Anonymous namespace
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
// Implements bindless image sample with implicit LOD using the macro-compatible signature
Id EmitBindlessImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& bias_lc, const IR::Value& offset) {
const IR::Value& bindless_handle = handle;
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(bindless_handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
if (ctx.stage == Stage::Fragment) {
// ImageOperands(EmitContext&, bool has_bias, bool has_lod, bool has_lod_clamp, Id lod, const IR::Value& offset)
const Id bias = ctx.Def(bias_lc);
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias, offset);
return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
&EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4],
sampler, coords, operands.MaskOptional(), operands.Span());
} else {
const Id lod = ctx.Const(0.0f);
const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
sampler, coords, operands.Mask(), operands.Span());
}
}
Id EmitBindlessImageSampleExplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
// Implements bindless image sample with explicit LOD using the macro-compatible signature
Id EmitBindlessImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& lod, const IR::Value& offset) {
const IR::Value& bindless_handle = handle;
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(bindless_handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
// ImageOperands(EmitContext&, bool has_bias, bool has_lod, bool has_lod_clamp, Id lod, const IR::Value& offset)
const Id lod_id = ctx.Def(lod);
const ImageOperands operands(ctx, false, true, false, lod_id, offset);
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4],
sampler, coords, operands.Mask(), operands.Span());
}
Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
// Multi-argument version for bindless image sample dref implicit lod
Id EmitBindlessImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, Id dref, const IR::Value& bias_lc, const IR::Value& offset) {
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
if (ctx.stage == Stage::Fragment) {
const Id bias = ctx.Def(bias_lc);
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias, offset);
return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod,
&EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1],
sampler, coords, dref, operands.MaskOptional(), operands.Span());
} else {
const Id lod = ctx.Const(0.0f);
const ImageOperands operands(ctx, false, true, false, lod, offset);
return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
&EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
sampler, coords, dref, operands.Mask(), operands.Span());
}
}
Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
throw LogicError("Unreachable instruction");
// Implements bindless image shadow sample with explicit LOD using the bindless handle as index
Id EmitBindlessImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, Id dref, const IR::Value& lod, const IR::Value& offset) {
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
const Id lod_id = ctx.Def(lod);
const ImageOperands operands(ctx, false, true, false, lod_id, offset);
return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
&EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
sampler, coords, dref, operands.Mask(), operands.Span());
}
Id EmitBindlessImageGather(EmitContext&) {
throw LogicError("Unreachable instruction");
// Multi-argument version for bindless image gather
Id EmitBindlessImageGather(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& offset, const IR::Value& offset2) {
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
const ImageOperands operands(ctx, offset, offset2);
Id gather_coords = coords;
if (ctx.profile.need_gather_subpixel_offset) {
gather_coords = ImageGatherSubpixelOffset(ctx, info, sampler, coords);
}
return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
ctx.F32[4], sampler, gather_coords, ctx.Const(info.gather_component),
operands.MaskOptional(), operands.Span());
}
Id EmitBindlessImageGatherDref(EmitContext&) {
throw LogicError("Unreachable instruction");
// Multi-argument version for bindless image gather dref
Id EmitBindlessImageGatherDref(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& offset, const IR::Value& offset2, Id dref) {
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
const ImageOperands operands(ctx, offset, offset2);
Id gather_coords = coords;
if (ctx.profile.need_gather_subpixel_offset) {
gather_coords = ImageGatherSubpixelOffset(ctx, info, sampler, coords);
}
return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
ctx.F32[4], sampler, gather_coords, dref, operands.MaskOptional(),
operands.Span());
}
Id EmitBindlessImageFetch(EmitContext&) {
throw LogicError("Unreachable instruction");
// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle, const IR::Value& offset, const IR::Value& ms, const IR::Value& lod)
Id EmitBindlessImageFetch(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& offset, Id lod, const IR::Value& ms) {
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
const Id ms_id = ctx.Def(ms);
const ImageOperands operands(lod, ms_id);
const Id result_type = ctx.F32[4];
Id color = Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, result_type, sampler, coords, operands.MaskOptional(), operands.Span());
return color;
}
Id EmitBindlessImageQueryDimensions(EmitContext&) {
throw LogicError("Unreachable instruction");
// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle, const IR::Value& lod)
Id EmitBindlessImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, Id handle, Id lod, Id skip_mips) {
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, handle);
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
// skip_mips is not used in this implementation
return ctx.OpImageQuerySizeLod(ctx.U32[2], sampler, lod);
}
Id EmitBindlessImageQueryLod(EmitContext&) {
throw LogicError("Unreachable instruction");
// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle)
Id EmitBindlessImageQueryLod(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle) {
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
return ctx.OpImageQueryLod(ctx.U32[2], sampler, coords);
}
Id EmitBindlessImageGradient(EmitContext&) {
throw LogicError("Unreachable instruction");
// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle, const IR::Value& derivatives, const IR::Value& offset, const IR::Value& lod_clamp)
Id EmitBindlessImageGradient(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, const IR::Value& derivatives, const IR::Value& offset, const IR::Value& lod_clamp) {
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
const Id deriv_id = ctx.Def(derivatives);
const Id lod_clamp_id = ctx.Def(lod_clamp);
const ImageOperands operands(ctx, false, deriv_id, 2, offset, lod_clamp_id);
// Directly call the robust wrappers with macro-compatible signature
if (inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)) {
return ctx.OpImageSparseSampleGrad(ctx.F32[4], sampler, coords, deriv_id, deriv_id, operands.MaskOptional(), operands.Span());
} else {
return ctx.OpImageSampleGrad(ctx.F32[4], sampler, coords, deriv_id, deriv_id, operands.MaskOptional(), operands.Span());
}
}
Id EmitBindlessImageRead(EmitContext&) {
throw LogicError("Unreachable instruction");
// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle)
Id EmitBindlessImageRead(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle) {
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
return ctx.OpImageRead(ctx.U32[4], sampler, coords);
}
Id EmitBindlessImageWrite(EmitContext&) {
throw LogicError("Unreachable instruction");
// Macro-compatible: (EmitContext&, IR::Inst*, Id coords, const IR::Value& handle, Id color)
void EmitBindlessImageWrite(EmitContext& ctx, IR::Inst* inst, Id coords, const IR::Value& handle, Id color) {
const auto info = inst->Flags<IR::TextureInstInfo>();
const TextureDefinition& def = ctx.textures.at(info.descriptor_index);
const Id pointer = ctx.OpAccessChain(def.pointer_type, def.id, ctx.Def(handle));
const Id sampler = ctx.OpLoad(def.sampled_type, pointer);
ctx.OpImageWrite(sampler, coords, color);
}
Id EmitBoundImageSampleImplicitLod(EmitContext&) {

View File

@ -513,18 +513,102 @@ Id EmitConvertU8U32(EmitContext& ctx, Id value);
Id EmitConvertU32U8(EmitContext& ctx, Id value);
Id EmitConvertS32S8(EmitContext& ctx, Id value);
Id EmitConvertS32S16(EmitContext& ctx, Id value);
Id EmitBindlessImageSampleImplicitLod(EmitContext&);
Id EmitBindlessImageSampleExplicitLod(EmitContext&);
Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
Id EmitBindlessImageGather(EmitContext&);
Id EmitBindlessImageGatherDref(EmitContext&);
Id EmitBindlessImageFetch(EmitContext&);
Id EmitBindlessImageQueryDimensions(EmitContext&);
Id EmitBindlessImageQueryLod(EmitContext&);
Id EmitBindlessImageGradient(EmitContext&);
Id EmitBindlessImageRead(EmitContext&);
Id EmitBindlessImageWrite(EmitContext&);
// Bindless image single-argument stubs (for macro compatibility)
Id EmitBindlessImageSampleImplicitLod(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle, // Opaque
const IR::Value& bias_lc, // Opaque
const IR::Value& offset // Opaque
);
// Real implementation for BindlessImageSampleExplicitLod
Id EmitBindlessImageSampleExplicitLod(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle, // Opaque
const IR::Value& lod, // Opaque
const IR::Value& offset // Opaque
);
// Real implementation for BindlessImageSampleDrefImplicitLod
Id EmitBindlessImageSampleDrefImplicitLod(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle, // Opaque
Id dref, // F32
const IR::Value& bias_lc, // Opaque
const IR::Value& offset // Opaque
);
// Real implementation for BindlessImageSampleDrefExplicitLod
Id EmitBindlessImageSampleDrefExplicitLod(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle, // Opaque
Id dref, // F32
const IR::Value& lod, // Opaque
const IR::Value& offset // Opaque
);
// Real implementation for BindlessImageGather
Id EmitBindlessImageGather(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle, // Opaque
const IR::Value& offset, // Opaque
const IR::Value& offset2 // Opaque
);
// Real implementation for BindlessImageGatherDref
Id EmitBindlessImageGatherDref(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle, // Opaque
const IR::Value& offset, // Opaque
const IR::Value& offset2, // Opaque
Id dref // F32
);
// Real implementation for BindlessImageFetch
Id EmitBindlessImageFetch(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle, // Opaque
const IR::Value& offset, // Opaque
Id lod, // U32
const IR::Value& ms // Opaque
);
// Real implementation for BindlessImageQueryDimensions
Id EmitBindlessImageQueryDimensions(
EmitContext& ctx, IR::Inst* inst,
Id handle, // U32
Id lod, // U32
Id skip_mips // U1
);
// Real implementation for BindlessImageQueryLod
Id EmitBindlessImageQueryLod(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle // Opaque
);
// Real implementation for BindlessImageGradient
Id EmitBindlessImageGradient(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle, // Opaque
const IR::Value& derivatives, // Opaque
const IR::Value& offset, // Opaque
const IR::Value& lod_clamp // Opaque
);
// Real implementation for BindlessImageRead
Id EmitBindlessImageRead(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle // Opaque
);
// Real implementation for BindlessImageWrite
void EmitBindlessImageWrite(
EmitContext& ctx, IR::Inst* inst,
Id coords, // U32
const IR::Value& handle, // Opaque
Id color // U32x4
);
Id EmitBoundImageSampleImplicitLod(EmitContext&);
Id EmitBoundImageSampleExplicitLod(EmitContext&);
Id EmitBoundImageSampleDrefImplicitLod(EmitContext&);

View File

@ -1696,4 +1696,30 @@ void EmitContext::DefineOutputs(const IR::Program& program) {
}
}
Id EmitContext::OpImageSampleGrad(Id result_type, Id sampled_image, Id coordinate, Id dPdx, Id dPdy, std::optional<spv::ImageOperandsMask> mask, std::span<const Id> operands) {
// Sirit does not provide OpImageSampleGrad directly, use OpImageSampleExplicitLod with Grad mask
std::vector<Id> args = {dPdx, dPdy};
if (operands.size() > 0) {
args.insert(args.end(), operands.begin(), operands.end());
}
if (mask.has_value()) {
return this->OpImageSampleExplicitLod(result_type, sampled_image, coordinate, static_cast<spv::ImageOperandsMask>(spv::ImageOperandsMask::Grad | *mask), args);
} else {
return this->OpImageSampleExplicitLod(result_type, sampled_image, coordinate, spv::ImageOperandsMask::Grad, args);
}
}
Id EmitContext::OpImageSparseSampleGrad(Id result_type, Id sampled_image, Id coordinate, Id dPdx, Id dPdy, std::optional<spv::ImageOperandsMask> mask, std::span<const Id> operands) {
// Sirit does not provide OpImageSparseSampleGrad directly, use OpImageSparseSampleExplicitLod with Grad mask
std::vector<Id> args = {dPdx, dPdy};
if (operands.size() > 0) {
args.insert(args.end(), operands.begin(), operands.end());
}
if (mask.has_value()) {
return this->OpImageSparseSampleExplicitLod(result_type, sampled_image, coordinate, static_cast<spv::ImageOperandsMask>(spv::ImageOperandsMask::Grad | *mask), args);
} else {
return this->OpImageSparseSampleExplicitLod(result_type, sampled_image, coordinate, spv::ImageOperandsMask::Grad, args);
}
}
} // namespace Shader::Backend::SPIRV

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -361,6 +364,9 @@ public:
Id load_const_func_u32x2{};
Id load_const_func_u32x4{};
Id OpImageSampleGrad(Id result_type, Id sampled_image, Id coordinate, Id dPdx, Id dPdy, std::optional<spv::ImageOperandsMask> mask, std::span<const Id> operands);
Id OpImageSparseSampleGrad(Id result_type, Id sampled_image, Id coordinate, Id dPdx, Id dPdy, std::optional<spv::ImageOperandsMask> mask, std::span<const Id> operands);
private:
void DefineCommonTypes(const Info& info);
void DefineCommonConstants();

View File

@ -43,6 +43,24 @@ using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
constexpr u32 DESCRIPTOR_SIZE = 8;
constexpr u32 DESCRIPTOR_SIZE_SHIFT = static_cast<u32>(std::countr_zero(DESCRIPTOR_SIZE));
// Helper to reduce repeated construction of ConstBufferAddr
static ConstBufferAddr MakeConstBufferAddr(u32 index_val, u32 base_offset, IR::U32 dynamic_offset,
u32 count = 8, bool has_secondary = false,
u32 shift_left = 0, u32 secondary_index = 0,
u32 secondary_offset = 0, u32 secondary_shift_left = 0) {
return ConstBufferAddr{
.index = index_val,
.offset = base_offset,
.shift_left = shift_left,
.secondary_index = secondary_index,
.secondary_offset = secondary_offset,
.secondary_shift_left = secondary_shift_left,
.dynamic_offset = dynamic_offset,
.count = count,
.has_secondary = has_secondary,
};
}
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::BindlessImageSampleImplicitLod:
@ -178,11 +196,11 @@ bool IsTextureInstruction(const IR::Inst& inst) {
return IndexedInstruction(inst) != IR::Opcode::Void;
}
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env);
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env, const IR::Program* program);
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env) {
std::optional<ConstBufferAddr> Track(const IR::Value& value, Environment& env, const IR::Program* program) {
return IR::BreadthFirstSearch(
value, [&env](const IR::Inst* inst) { return TryGetConstBuffer(inst, env); });
value, [&env, program](const IR::Inst* inst) { return TryGetConstBuffer(inst, env, program); });
}
std::optional<u32> TryGetConstant(IR::Value& value, Environment& env) {
@ -205,14 +223,67 @@ std::optional<u32> TryGetConstant(IR::Value& value, Environment& env) {
const auto offset_number = offset.U32();
return env.ReadCbufValue(index_number, offset_number);
}
// Helper: find the source value written into a local (WriteLocal) for a given
// local index. Returns the writer's source value or nullopt.
static std::optional<IR::Value> FindLocalWriterSource(const IR::Value& local_idx, const IR::Program* program) {
if (!local_idx.IsImmediate() || !program) {
return std::nullopt;
}
const u32 local_word = local_idx.U32();
for (IR::Block* const block : program->post_order_blocks) {
for (IR::Inst& writer : block->Instructions()) {
if (writer.GetOpcode() != IR::Opcode::WriteLocal) {
continue;
}
const IR::Value writer_idx{writer.Arg(0)};
if (!writer_idx.IsImmediate() || writer_idx.U32() != local_word) {
continue;
}
return IR::Value{writer.Arg(1)};
}
}
return std::nullopt;
}
// Helper: if the supplied IAdd32 inst composes a ConstBufferAddr (one side
// tracks to a ConstBufferAddr), return that combined address with the other
// side as the dynamic offset.
static std::optional<ConstBufferAddr> TryGetConstBufferFromIAdd(const IR::Inst* inst, Environment& env,
const IR::Program* program) {
IR::Value a{inst->Arg(0)};
IR::Value b{inst->Arg(1)};
std::optional<ConstBufferAddr> lhs{Track(a, env, program)};
std::optional<ConstBufferAddr> rhs{Track(b, env, program)};
if (lhs && !rhs) {
lhs->dynamic_offset = IR::U32{b};
return lhs;
}
if (rhs && !lhs) {
rhs->dynamic_offset = IR::U32{a};
return rhs;
}
return std::nullopt;
}
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env, const IR::Program* program) {
// Handle LoadLocal uniformly by finding the writer and tracking its source.
if (inst->GetOpcode() == IR::Opcode::LoadLocal) {
const IR::Value local_idx{inst->Arg(0)};
if (const auto src = FindLocalWriterSource(local_idx, program)) {
return Track(*src, env, program);
}
return std::nullopt;
}
std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environment& env) {
switch (inst->GetOpcode()) {
default:
return std::nullopt;
case IR::Opcode::IAdd32: {
return TryGetConstBufferFromIAdd(inst, env, program);
}
case IR::Opcode::BitwiseOr32: {
std::optional lhs{Track(inst->Arg(0), env)};
std::optional rhs{Track(inst->Arg(1), env)};
std::optional lhs{Track(inst->Arg(0), env, program)};
std::optional rhs{Track(inst->Arg(1), env, program)};
if (!lhs || !rhs) {
return std::nullopt;
}
@ -225,29 +296,19 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
if (lhs->shift_left > 0 || lhs->index > rhs->index || lhs->offset > rhs->offset) {
std::swap(lhs, rhs);
}
return ConstBufferAddr{
.index = lhs->index,
.offset = lhs->offset,
.shift_left = lhs->shift_left,
.secondary_index = rhs->index,
.secondary_offset = rhs->offset,
.secondary_shift_left = rhs->shift_left,
.dynamic_offset = {},
.count = 1,
.has_secondary = true,
};
return MakeConstBufferAddr(lhs->index, lhs->offset, IR::U32{}, 1, true, lhs->shift_left,
rhs->index, rhs->offset, rhs->shift_left);
}
case IR::Opcode::ShiftLeftLogical32: {
const IR::Value shift{inst->Arg(1)};
if (!shift.IsImmediate()) {
return std::nullopt;
}
std::optional lhs{Track(inst->Arg(0), env)};
std::optional lhs{Track(inst->Arg(0), env, program)};
if (lhs) {
lhs->shift_left = shift.U32();
}
return lhs;
break;
}
case IR::Opcode::BitwiseAnd32: {
IR::Value op1{inst->Arg(0)};
@ -256,32 +317,31 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
std::swap(op1, op2);
}
if (!op2.IsImmediate() && !op1.IsImmediate()) {
do {
auto try_index = TryGetConstant(op1, env);
if (try_index) {
op1 = op2;
op2 = IR::Value{*try_index};
break;
}
auto try_index = TryGetConstant(op1, env);
if (try_index) {
op1 = op2;
op2 = IR::Value{*try_index};
} else {
auto try_index_2 = TryGetConstant(op2, env);
if (try_index_2) {
op2 = IR::Value{*try_index_2};
break;
} else {
return std::nullopt;
}
return std::nullopt;
} while (false);
}
}
std::optional lhs{Track(op1, env)};
std::optional lhs{Track(op1, env, program)};
if (lhs) {
lhs->shift_left = static_cast<u32>(std::countr_zero(op2.U32()));
}
return lhs;
break;
}
case IR::Opcode::GetCbufU32x2:
case IR::Opcode::GetCbufU32:
break;
}
// Handle GetCbufU32 / GetCbufU32x2 indexed accesses.
const IR::Value index{inst->Arg(0)};
const IR::Value offset{inst->Arg(1)};
if (!index.IsImmediate()) {
@ -290,68 +350,82 @@ std::optional<ConstBufferAddr> TryGetConstBuffer(const IR::Inst* inst, Environme
return std::nullopt;
}
if (offset.IsImmediate()) {
return ConstBufferAddr{
.index = index.U32(),
.offset = offset.U32(),
.shift_left = 0,
.secondary_index = 0,
.secondary_offset = 0,
.secondary_shift_left = 0,
.dynamic_offset = {},
.count = 1,
.has_secondary = false,
};
return MakeConstBufferAddr(index.U32(), offset.U32(), IR::U32{}, 1, false);
}
IR::Inst* const offset_inst{offset.InstRecursive()};
// If the offset is loaded from a local, try to find the writer and analyze it.
if (offset_inst->GetOpcode() == IR::Opcode::LoadLocal) {
const IR::Value local_idx{offset_inst->Arg(0)};
if (const auto writer_src = FindLocalWriterSource(local_idx, program)) {
const IR::Value src{*writer_src};
if (src.IsImmediate()) {
return MakeConstBufferAddr(index.U32(), src.U32(), IR::U32{});
}
IR::Inst* const src_inst{src.InstRecursive()};
if (src_inst->GetOpcode() == IR::Opcode::IAdd32) {
// Extract immediate base + dynamic offset from the IAdd32 writer.
if (src_inst->Arg(0).IsImmediate()) {
return MakeConstBufferAddr(index.U32(), src_inst->Arg(0).U32(), IR::U32{src_inst->Arg(1)});
} else if (src_inst->Arg(1).IsImmediate()) {
return MakeConstBufferAddr(index.U32(), src_inst->Arg(1).U32(), IR::U32{src_inst->Arg(0)});
}
}
}
return std::nullopt;
}
// Otherwise the offset must be an IAdd32 (base + dynamic) to be supported.
if (offset_inst->GetOpcode() != IR::Opcode::IAdd32) {
return std::nullopt;
}
u32 base_offset{};
IR::U32 dynamic_offset;
if (offset_inst->Arg(0).IsImmediate()) {
base_offset = offset_inst->Arg(0).U32();
dynamic_offset = IR::U32{offset_inst->Arg(1)};
} else if (offset_inst->Arg(1).IsImmediate()) {
base_offset = offset_inst->Arg(1).U32();
dynamic_offset = IR::U32{offset_inst->Arg(0)};
} else {
return std::nullopt;
return MakeConstBufferAddr(index.U32(), offset_inst->Arg(0).U32(), IR::U32{offset_inst->Arg(1)});
}
return ConstBufferAddr{
.index = index.U32(),
.offset = base_offset,
.shift_left = 0,
.secondary_index = 0,
.secondary_offset = 0,
.secondary_shift_left = 0,
.dynamic_offset = dynamic_offset,
.count = 8,
.has_secondary = false,
};
if (offset_inst->Arg(1).IsImmediate()) {
return MakeConstBufferAddr(index.U32(), offset_inst->Arg(1).U32(), IR::U32{offset_inst->Arg(0)});
}
return std::nullopt;
}
TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
TextureInst MakeInst(Environment& env, IR::Program& program, IR::Block* block, IR::Inst& inst) {
ConstBufferAddr addr;
if (IsBindless(inst)) {
const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0), env)};
const std::optional<ConstBufferAddr> track_addr{Track(inst.Arg(0), env, &program)};
if (!track_addr) {
throw NotImplementedException("Failed to track bindless texture constant buffer");
std::string arg0_str;
const IR::Inst* arg0_producer = nullptr;
if (inst.Arg(0).IsImmediate()) {
arg0_str = std::to_string(inst.Arg(0).U32());
} else {
arg0_producer = inst.Arg(0).InstRecursive();
arg0_str = "ptr=" + std::to_string(reinterpret_cast<uintptr_t>(arg0_producer)) + ", op=" + std::to_string(static_cast<int>(arg0_producer->GetOpcode()));
}
std::string arg1_str;
const IR::Inst* arg1_producer = nullptr;
if (inst.NumArgs() > 1) {
if (inst.Arg(1).IsImmediate()) {
arg1_str = std::to_string(inst.Arg(1).U32());
} else {
arg1_producer = inst.Arg(1).InstRecursive();
arg1_str = "ptr=" + std::to_string(reinterpret_cast<uintptr_t>(arg1_producer)) + ", op=" + std::to_string(static_cast<int>(arg1_producer->GetOpcode()));
}
}
LOG_ERROR(HW_GPU, "MakeInst: Failed to track bindless texture constant buffer: opcode={}, Arg(0)={}, Arg(1)={}",
inst.GetOpcode(), arg0_str.c_str(), arg1_str.c_str());
if (arg0_producer) {
LOG_ERROR(HW_GPU, "MakeInst: Arg(0) producer opcode={} @ {}", arg0_producer->GetOpcode(), reinterpret_cast<uintptr_t>(arg0_producer));
}
if (arg1_producer) {
LOG_ERROR(HW_GPU, "MakeInst: Arg(1) producer opcode={} @ {}", arg1_producer->GetOpcode(), reinterpret_cast<uintptr_t>(arg1_producer));
}
throw NotImplementedException("MakeInst: Failed to track bindless texture constant buffer");
} else {
addr = *track_addr;
}
} else {
addr = ConstBufferAddr{
.index = env.TextureBoundBuffer(),
.offset = inst.Arg(0).U32(),
.shift_left = 0,
.secondary_index = 0,
.secondary_offset = 0,
.secondary_shift_left = 0,
.dynamic_offset = {},
.count = 1,
.has_secondary = false,
};
addr = MakeConstBufferAddr(env.TextureBoundBuffer(), inst.Arg(0).U32(), IR::U32{}, 1, false);
}
return TextureInst{
.cbuf = addr,
@ -534,7 +608,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
if (!IsTextureInstruction(inst)) {
continue;
}
to_replace.push_back(MakeInst(env, block, inst));
to_replace.push_back(MakeInst(env, program, block, inst));
}
}
// Sort instructions to visit textures by constant buffer index, then by offset