diff --git a/src/dynarmic/docs/ReturnStackBufferOptimization.md b/src/dynarmic/docs/ReturnStackBufferOptimization.md index 0e72c3bce8..03bc5acfbd 100644 --- a/src/dynarmic/docs/ReturnStackBufferOptimization.md +++ b/src/dynarmic/docs/ReturnStackBufferOptimization.md @@ -77,9 +77,9 @@ void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) { ASSERT(inst->GetArg(0).IsImmediate()); u64 imm64 = inst->GetArg(0).GetU64(); - Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX}); - Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(); - Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr(code, {HostLoc::RCX}); + Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(code); + Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr(code).cvt32(); u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end() ? u64(unique_hash_to_code_ptr[imm64]) : u64(code->GetReturnFromRunCodeAddress()); diff --git a/src/dynarmic/src/dynarmic/CMakeLists.txt b/src/dynarmic/src/dynarmic/CMakeLists.txt index 5d52637ec3..9575eaab6f 100644 --- a/src/dynarmic/src/dynarmic/CMakeLists.txt +++ b/src/dynarmic/src/dynarmic/CMakeLists.txt @@ -175,7 +175,6 @@ if ("x86_64" IN_LIST ARCHITECTURE) backend/x64/exclusive_monitor.cpp backend/x64/exclusive_monitor_friend.h backend/x64/host_feature.h - backend/x64/hostloc.cpp backend/x64/hostloc.h backend/x64/jitstate_info.h backend/x64/oparg.h diff --git a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp index 974bb99ac3..ef21fd45bd 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_data_processing.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -60,7 +63,7 @@ void EmitIR(oaknut::CodeGenerator& code, EmitContext template<> void EmitIR(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - bool const args_in_gpr[] = { args[0].IsInGpr(), args[1].IsInGpr() }; + bool const args_in_gpr[] = { args[0].IsInGpr(ctx.reg_alloc), args[1].IsInGpr(ctx.reg_alloc) }; if (args_in_gpr[0] && args_in_gpr[1]) { auto Xlo = ctx.reg_alloc.ReadX(args[0]); auto Xhi = ctx.reg_alloc.ReadX(args[1]); diff --git a/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp index df30f8640d..5e0cb79de2 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.cpp @@ -84,7 +84,7 @@ IR::AccType Argument::GetImmediateAccType() const { return value.GetAccType(); } -HostLoc::Kind Argument::CurrentLocationKind() const { +HostLoc::Kind Argument::CurrentLocationKind(RegAlloc& reg_alloc) const { return reg_alloc.ValueLocation(value.GetInst())->kind; } @@ -131,7 +131,7 @@ void HostLocInfo::UpdateUses() { } RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) { - ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}}; + ArgumentInfo ret = {Argument{}, Argument{}, Argument{}, Argument{}}; for (size_t i = 0; i < inst->NumArgs(); i++) { const IR::Value arg = inst->GetArg(i); ret[i].value = arg; diff --git a/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.h index 13cdbaa842..099be4ec10 100644 --- a/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/arm64/reg_alloc.h @@ -64,18 +64,18 @@ public: IR::AccType GetImmediateAccType() const; // Only valid if not immediate - HostLoc::Kind CurrentLocationKind() const; - bool IsInGpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Gpr; } - bool IsInFpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Fpr; } + HostLoc::Kind CurrentLocationKind(RegAlloc& reg_alloc) const; + bool IsInGpr(RegAlloc& reg_alloc) const { + return !IsImmediate() && CurrentLocationKind(reg_alloc) == HostLoc::Kind::Gpr; + } + bool IsInFpr(RegAlloc& reg_alloc) const { + return !IsImmediate() && CurrentLocationKind(reg_alloc) == HostLoc::Kind::Fpr; + } private: friend class RegAlloc; - explicit Argument(RegAlloc& reg_alloc) - : reg_alloc{reg_alloc} {} - - bool allocated = false; - RegAlloc& reg_alloc; IR::Value value; + bool allocated = false; }; struct FlagsTag final { diff --git a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp index 6c8a479ec3..90d9b73e08 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a32_emit_x64.cpp @@ -117,7 +117,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) { return gprs; }(); - new (&this->reg_alloc) RegAlloc(&code, gpr_order, any_xmm); + new (&this->reg_alloc) RegAlloc(gpr_order, any_xmm); A32EmitContext ctx{conf, reg_alloc, block}; // Start emitting. @@ -283,47 +283,47 @@ void A32EmitX64::GenTerminalHandlers() { void A32EmitX64::EmitA32SetCheckBit(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); + const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8(); code.mov(code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], to_store); } void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) { const A32::Reg reg = inst->GetArg(0).GetA32RegRef(); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, MJitStateReg(reg)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A32EmitX64::EmitA32GetExtendedRegister32(A32EmitContext& ctx, IR::Inst* inst) { const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); ASSERT(A32::IsSingleExtReg(reg)); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movss(result, MJitStateExtReg(reg)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A32EmitX64::EmitA32GetExtendedRegister64(A32EmitContext& ctx, IR::Inst* inst) { const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); ASSERT(A32::IsDoubleExtReg(reg)); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movsd(result, MJitStateExtReg(reg)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A32EmitX64::EmitA32GetVector(A32EmitContext& ctx, IR::Inst* inst) { const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg)); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (A32::IsDoubleExtReg(reg)) { code.movsd(result, MJitStateExtReg(reg)); } else { code.movaps(result, MJitStateExtReg(reg)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) { @@ -332,11 +332,11 @@ void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) { if (args[1].IsImmediate()) { code.mov(MJitStateReg(reg), args[1].GetImmediateU32()); - } else if (args[1].IsInXmm()) { - const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); + } else if (args[1].IsInXmm(ctx.reg_alloc)) { + const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]); code.movd(MJitStateReg(reg), to_store); } else { - const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); code.mov(MJitStateReg(reg), to_store); } } @@ -346,11 +346,11 @@ void A32EmitX64::EmitA32SetExtendedRegister32(A32EmitContext& ctx, IR::Inst* ins const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); ASSERT(A32::IsSingleExtReg(reg)); - if (args[1].IsInXmm()) { - Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); + if (args[1].IsInXmm(ctx.reg_alloc)) { + Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]); code.movss(MJitStateExtReg(reg), to_store); } else { - Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); code.mov(MJitStateExtReg(reg), to_store); } } @@ -360,11 +360,11 @@ void A32EmitX64::EmitA32SetExtendedRegister64(A32EmitContext& ctx, IR::Inst* ins const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); ASSERT(A32::IsDoubleExtReg(reg)); - if (args[1].IsInXmm()) { - const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); + if (args[1].IsInXmm(ctx.reg_alloc)) { + const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]); code.movsd(MJitStateExtReg(reg), to_store); } else { - const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]); + const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[1]); code.mov(MJitStateExtReg(reg), to_store); } } @@ -374,7 +374,7 @@ void A32EmitX64::EmitA32SetVector(A32EmitContext& ctx, IR::Inst* inst) { const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef(); ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg)); - const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]); if (A32::IsDoubleExtReg(reg)) { code.movsd(MJitStateExtReg(reg), to_store); } else { @@ -383,9 +383,9 @@ void A32EmitX64::EmitA32SetVector(A32EmitContext& ctx, IR::Inst* inst) { } void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (code.HasHostFeature(HostFeature::FastBMI2)) { // Here we observe that cpsr_et and cpsr_ge are right next to each other in memory, @@ -428,15 +428,15 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) { code.or_(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)]); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (conf.always_little_endian) { code.and_(cpsr, 0xFFFFFDFF); @@ -501,7 +501,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], to_store); } @@ -512,15 +512,15 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) { code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { - const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.shr(a, 28); code.mov(b, NZCV::x64_mask); code.pdep(a, a, b); code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); } else { - const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.shr(a, 28); code.imul(a, a, NZCV::to_x64_multiplier); @@ -537,8 +537,8 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm)); code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0)); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { - const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.shr(a, 28); code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); @@ -546,7 +546,7 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { code.pdep(a, a, b); code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a); } else { - const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.shr(a, 28); code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]); @@ -559,8 +559,8 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32SetCpsrNZ(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.movzx(tmp, code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1]); code.and_(tmp, 1); @@ -577,12 +577,12 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c); } else { - const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(args[1]).cvt8(); + const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(code, args[1]).cvt8(); code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c); } } else { - const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); if (args[1].IsImmediate()) { const bool c = args[1].GetImmediateU1(); @@ -590,7 +590,7 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { code.or_(nz, c); code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); } else { - const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); code.or_(nz, c); code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8()); @@ -599,13 +599,13 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) { } static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]); if (flag_bit != 0) { code.shr(result, static_cast(flag_bit)); } code.and_(result, 1); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A32EmitX64::EmitA32GetCFlag(A32EmitContext& ctx, IR::Inst* inst) { @@ -619,27 +619,27 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) { code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], 1); } } else { - const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); + const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8(); code.or_(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], to_store); } } void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(!args[0].IsImmediate()); - if (args[0].IsInXmm()) { - const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); + if (args[0].IsInXmm(ctx.reg_alloc)) { + const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]); code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store); } else { - const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32(); + const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store); } } @@ -656,8 +656,8 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], ge); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { - const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(b, 0x01010101); code.shr(a, 16); @@ -665,7 +665,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst code.imul(a, a, 0xFF); code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a); } else { - const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.shr(a, 16); code.and_(a, 0xF); @@ -690,7 +690,7 @@ void A32EmitX64::EmitA32InstructionSynchronizationBarrier(A32EmitContext& ctx, I return; } - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); Devirtualize<&A32::UserCallbacks::InstructionSynchronizationBarrierRaised>(conf.callbacks).EmitCall(code); } @@ -718,9 +718,9 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) { code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask); code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper); } else { - const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32(); - const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 new_upper = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(code, arg).cvt32(); + const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 new_upper = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(mask, new_pc); code.and_(mask, 1); @@ -745,7 +745,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { code.SwitchMxcsrOnExit(); if (conf.enable_cycle_counting) { - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code); @@ -753,7 +753,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { } auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]); Devirtualize<&A32::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code); if (conf.enable_cycle_counting) { @@ -767,7 +767,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) { void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) { code.SwitchMxcsrOnExit(); - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); if (conf.enable_cycle_counting) { code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]); code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]); @@ -797,7 +797,7 @@ static u32 GetFpscrImpl(A32JitState* jit_state) { } void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) { - ctx.reg_alloc.HostCall(inst); + ctx.reg_alloc.HostCall(code, inst); code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]); @@ -810,7 +810,7 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) { void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(nullptr, args[0]); + ctx.reg_alloc.HostCall(code, nullptr, args[0]); code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR); code.CallFunction(&SetFpscrImpl); @@ -818,17 +818,17 @@ void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) { } void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)]); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::FastBMI2)) { - const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(tmp, NZCV::x64_mask); code.pext(tmp, value, tmp); @@ -838,7 +838,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) { return; } - const Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.and_(value, NZCV::x64_mask); code.imul(value, value, NZCV::from_x64_multiplier); @@ -851,7 +851,7 @@ static void EmitCoprocessorException() { } static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional arg0 = {}, std::optional arg1 = {}) { - reg_alloc.HostCall(inst, {}, arg0, arg1); + reg_alloc.HostCall(code, inst, {}, arg0, arg1); if (callback.user_arg) { code.mov(code.ABI_PARAM1, reinterpret_cast(*callback.user_arg)); @@ -914,8 +914,8 @@ void A32EmitX64::EmitA32CoprocSendOneWord(A32EmitContext& ctx, IR::Inst* inst) { } if (const auto destination_ptr = std::get_if(&action)) { - const Xbyak::Reg32 reg_word = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg32 reg_word = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); + const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code); code.mov(reg_destination_addr, reinterpret_cast(*destination_ptr)); code.mov(code.dword[reg_destination_addr], reg_word); @@ -954,9 +954,9 @@ void A32EmitX64::EmitA32CoprocSendTwoWords(A32EmitContext& ctx, IR::Inst* inst) } if (const auto destination_ptrs = std::get_if>(&action)) { - const Xbyak::Reg32 reg_word1 = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - const Xbyak::Reg32 reg_word2 = ctx.reg_alloc.UseGpr(args[2]).cvt32(); - const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg32 reg_word1 = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); + const Xbyak::Reg32 reg_word2 = ctx.reg_alloc.UseGpr(code, args[2]).cvt32(); + const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code); code.mov(reg_destination_addr, reinterpret_cast((*destination_ptrs)[0])); code.mov(code.dword[reg_destination_addr], reg_word1); @@ -998,13 +998,13 @@ void A32EmitX64::EmitA32CoprocGetOneWord(A32EmitContext& ctx, IR::Inst* inst) { } if (const auto source_ptr = std::get_if(&action)) { - const Xbyak::Reg32 reg_word = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg64 reg_source_addr = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg32 reg_word = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg64 reg_source_addr = ctx.reg_alloc.ScratchGpr(code); code.mov(reg_source_addr, reinterpret_cast(*source_ptr)); code.mov(reg_word, code.dword[reg_source_addr]); - ctx.reg_alloc.DefineValue(inst, reg_word); + ctx.reg_alloc.DefineValue(code, inst, reg_word); return; } @@ -1038,9 +1038,9 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) { } if (const auto source_ptrs = std::get_if>(&action)) { - const Xbyak::Reg64 reg_result = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 reg_tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 reg_result = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 reg_tmp = ctx.reg_alloc.ScratchGpr(code); code.mov(reg_destination_addr, reinterpret_cast((*source_ptrs)[1])); code.mov(reg_result.cvt32(), code.dword[reg_destination_addr]); @@ -1049,7 +1049,7 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) { code.mov(reg_tmp.cvt32(), code.dword[reg_destination_addr]); code.or_(reg_result, reg_tmp); - ctx.reg_alloc.DefineValue(inst, reg_result); + ctx.reg_alloc.DefineValue(code, inst, reg_result); return; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp index e92aec04cf..9090285a7d 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/a64_emit_x64.cpp @@ -91,7 +91,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept { return gprs; }(); - new (&this->reg_alloc) RegAlloc{&code, gpr_order, any_xmm}; + new (&this->reg_alloc) RegAlloc{gpr_order, any_xmm}; A64EmitContext ctx{conf, reg_alloc, block}; // Start emitting. @@ -159,7 +159,7 @@ finish_this_inst: } code.int3(); - const size_t size = static_cast(code.getCurr() - entrypoint); + const size_t size = size_t(code.getCurr() - entrypoint); const A64::LocationDescriptor descriptor{block.Location()}; const A64::LocationDescriptor end_location{block.EndLocation()}; @@ -266,25 +266,25 @@ void A64EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8(); + const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8(); code.mov(code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], to_store); } void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]); code.shr(result, NZCV::x64_c_flag_bit); code.and_(result, 1); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(nzcv_raw, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]); if (code.HasHostFeature(HostFeature::FastBMI2)) { - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(tmp, NZCV::x64_mask); code.pext(nzcv_raw, nzcv_raw, tmp); code.shl(nzcv_raw, 28); @@ -294,16 +294,16 @@ void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { code.and_(nzcv_raw, NZCV::arm_mask); } - ctx.reg_alloc.DefineValue(inst, nzcv_raw); + ctx.reg_alloc.DefineValue(code, inst, nzcv_raw); } void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.shr(nzcv_raw, 28); if (code.HasHostFeature(HostFeature::FastBMI2)) { - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(tmp, NZCV::x64_mask); code.pdep(nzcv_raw, nzcv_raw, tmp); } else { @@ -315,63 +315,63 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], to_store); } void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) { const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) { const A64::Reg reg = inst->GetArg(0).GetA64RegRef(); - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movd(result, addr); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movq(result, addr); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movaps(result, addr); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } static u32 GetFPSRImpl(A64JitState* jit_state) { @@ -379,7 +379,7 @@ static u32 GetFPSRImpl(A64JitState* jit_state) { } void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) { - ctx.reg_alloc.HostCall(inst); + ctx.reg_alloc.HostCall(code, inst); code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); code.CallFunction(GetFPSRImpl); @@ -393,7 +393,7 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) { code.mov(addr, args[1].GetImmediateS32()); } else { // TODO: zext tracking, xmm variant - const Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(args[1]); + const Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(code, args[1]); code.mov(to_store.cvt32(), to_store.cvt32()); code.mov(addr, to_store); } @@ -405,11 +405,11 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) { const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast(reg)]; if (args[1].FitsInImmediateS32()) { code.mov(addr, args[1].GetImmediateS32()); - } else if (args[1].IsInXmm()) { - const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); + } else if (args[1].IsInXmm(ctx.reg_alloc)) { + const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]); code.movq(addr, to_store); } else { - const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]); + const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[1]); code.mov(addr, to_store); } } @@ -419,8 +419,8 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; - const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); // TODO: Optimize code.pxor(tmp, tmp); code.movss(tmp, to_store); @@ -432,7 +432,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; - const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movq(to_store, to_store); // TODO: Remove when able code.movaps(addr, to_store); } @@ -442,7 +442,7 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) { const A64::Vec vec = inst->GetArg(0).GetA64VecRef(); const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast(vec)]; - const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]); code.movaps(addr, to_store); } @@ -451,11 +451,11 @@ void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) { const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]; if (args[0].FitsInImmediateS32()) { code.mov(addr, args[0].GetImmediateS32()); - } else if (args[0].IsInXmm()) { - const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); + } else if (args[0].IsInXmm(ctx.reg_alloc)) { + const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]); code.movq(addr, to_store); } else { - const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[0]); code.mov(addr, to_store); } } @@ -466,7 +466,7 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) { void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]); code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); code.CallFunction(SetFPCRImpl); code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); @@ -478,7 +478,7 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) { void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(nullptr, {}, args[0]); + ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]); code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR); code.CallFunction(SetFPSRImpl); code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]); @@ -489,17 +489,17 @@ void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) { const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]; if (args[0].FitsInImmediateS32()) { code.mov(addr, args[0].GetImmediateS32()); - } else if (args[0].IsInXmm()) { - const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]); + } else if (args[0].IsInXmm(ctx.reg_alloc)) { + const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]); code.movq(addr, to_store); } else { - const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[0]); code.mov(addr, to_store); } } void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) { - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[0].IsImmediate()); const u32 imm = args[0].GetImmediateU32(); @@ -511,7 +511,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) { } void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) { - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[0].IsImmediate() && args[1].IsImmediate()); const u64 pc = args[0].GetImmediateU64(); @@ -524,13 +524,13 @@ void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) { void A64EmitX64::EmitA64DataCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(nullptr, {}, args[1], args[2]); + ctx.reg_alloc.HostCall(code, nullptr, {}, args[1], args[2]); Devirtualize<&A64::UserCallbacks::DataCacheOperationRaised>(conf.callbacks).EmitCall(code); } void A64EmitX64::EmitA64InstructionCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]); + ctx.reg_alloc.HostCall(code, nullptr, {}, args[0], args[1]); Devirtualize<&A64::UserCallbacks::InstructionCacheOperationRaised>(conf.callbacks).EmitCall(code); } @@ -548,18 +548,18 @@ void A64EmitX64::EmitA64InstructionSynchronizationBarrier(A64EmitContext& ctx, I return; } - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); Devirtualize<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(conf.callbacks).EmitCall(code); } void A64EmitX64::EmitA64GetCNTFRQ(A64EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, conf.cntfrq_el0); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetCNTPCT(A64EmitContext& ctx, IR::Inst* inst) { - ctx.reg_alloc.HostCall(inst); + ctx.reg_alloc.HostCall(code, inst); if (!conf.wall_clock_cntpct) { code.UpdateTicks(); } @@ -567,43 +567,43 @@ void A64EmitX64::EmitA64GetCNTPCT(A64EmitContext& ctx, IR::Inst* inst) { } void A64EmitX64::EmitA64GetCTR(A64EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, conf.ctr_el0); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetDCZID(A64EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, conf.dczid_el0); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetTPIDR(A64EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); if (conf.tpidr_el0) { code.mov(result, u64(conf.tpidr_el0)); code.mov(result, qword[result]); } else { code.xor_(result.cvt32(), result.cvt32()); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64GetTPIDRRO(A64EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); if (conf.tpidrro_el0) { code.mov(result, u64(conf.tpidrro_el0)); code.mov(result, qword[result]); } else { code.xor_(result.cvt32(), result.cvt32()); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void A64EmitX64::EmitA64SetTPIDR(A64EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 addr = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Reg64 addr = ctx.reg_alloc.ScratchGpr(code); if (conf.tpidr_el0) { code.mov(addr, u64(conf.tpidr_el0)); code.mov(qword[addr], value); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp index 0ef14128ae..81cdc66780 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64.cpp @@ -68,7 +68,7 @@ void EmitX64::EmitVoid(EmitContext&, IR::Inst*) { void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (!args[0].IsImmediate()) { - ctx.reg_alloc.DefineValue(inst, args[0]); + ctx.reg_alloc.DefineValue(code, inst, args[0]); } } @@ -78,7 +78,7 @@ void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) { void EmitX64::EmitCallHostFunction(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(nullptr, args[1], args[2], args[3]); + ctx.reg_alloc.HostCall(code, nullptr, args[1], args[2], args[3]); code.mov(rax, args[0].GetImmediateU64()); code.call(rax); } @@ -120,7 +120,7 @@ void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) { code.lea(rax, ptr[rsp + sizeof(RegisterData) + offsetof(StackLayout, spill)]); code.mov(qword[rsp + offsetof(RegisterData, spill)], rax); - reg_alloc.EmitVerboseDebuggingOutput(); + reg_alloc.EmitVerboseDebuggingOutput(code); for (int i = 0; i < 16; i++) { if (rsp.getIdx() == i) { @@ -140,9 +140,9 @@ void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) { ASSERT(args[0].IsImmediate()); const u64 unique_hash_of_target = args[0].GetImmediateU64(); - ctx.reg_alloc.ScratchGpr(HostLoc::RCX); - const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr(); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX); + const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr(code); PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target}); } @@ -190,12 +190,12 @@ void EmitX64::EmitGetNZFromOp(EmitContext& ctx, IR::Inst* inst) { } }(); - const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); + const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize); code.test(value, value); code.lahf(); code.movzx(eax, ah); - ctx.reg_alloc.DefineValue(inst, nz); + ctx.reg_alloc.DefineValue(code, inst, nz); } void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { @@ -221,27 +221,27 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) { } }(); - const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); + const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize); code.test(value, value); code.lahf(); code.xor_(al, al); - ctx.reg_alloc.DefineValue(inst, nzcv); + ctx.reg_alloc.DefineValue(code, inst, nzcv); } void EmitX64::EmitGetCFlagFromNZCV(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); const u32 value = (args[0].GetImmediateU32() >> 8) & 1; code.mov(result, value); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.shr(result, 8); code.and_(result, 1); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } @@ -249,30 +249,30 @@ void EmitX64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (args[0].IsImmediate()) { - const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr(code).cvt32(); u32 value = 0; value |= mcl::bit::get_bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0; value |= mcl::bit::get_bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0; value |= mcl::bit::get_bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0; value |= mcl::bit::get_bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0; code.mov(nzcv, value); - ctx.reg_alloc.DefineValue(inst, nzcv); + ctx.reg_alloc.DefineValue(code, inst, nzcv); } else if (code.HasHostFeature(HostFeature::FastBMI2)) { - const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.shr(nzcv, 28); code.mov(tmp, NZCV::x64_mask); code.pdep(nzcv, nzcv, tmp); - ctx.reg_alloc.DefineValue(inst, nzcv); + ctx.reg_alloc.DefineValue(code, inst, nzcv); } else { - const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.shr(nzcv, 28); code.imul(nzcv, nzcv, NZCV::to_x64_multiplier); code.and_(nzcv, NZCV::x64_mask); - ctx.reg_alloc.DefineValue(inst, nzcv); + ctx.reg_alloc.DefineValue(code, inst, nzcv); } } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_aes.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_aes.cpp index 2ec9bce068..1ab5a12de1 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_aes.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_aes.cpp @@ -23,13 +23,13 @@ using AESFn = void(AES::State&, const AES::State&); static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, BlockOfCode& code, IR::Inst* inst, AESFn fn) { constexpr u32 stack_space = static_cast(sizeof(AES::State)) * 2; - const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); - ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]); @@ -37,22 +37,22 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]); - ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitAESDecryptSingleRound(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AES)) { - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); code.pxor(zero, zero); code.aesdeclast(data, zero); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); return; } @@ -63,13 +63,13 @@ void EmitX64::EmitAESEncryptSingleRound(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AES)) { - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); code.pxor(zero, zero); code.aesenclast(data, zero); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); return; } @@ -80,11 +80,11 @@ void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AES)) { - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.aesimc(data, data); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); return; } @@ -95,14 +95,14 @@ void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AES)) { - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); code.pxor(zero, zero); code.aesdeclast(data, zero); code.aesenc(data, zero); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); return; } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp index 9d7c57cb57..48983fa513 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_crc32.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD @@ -19,16 +22,16 @@ namespace CRC32 = Common::Crypto::CRC32; static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE42)) { - const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size); + const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(data_size); if (data_size != 64) { code.crc32(crc, value); } else { code.crc32(crc.cvt64(), value); } - ctx.reg_alloc.DefineValue(inst, crc); + ctx.reg_alloc.DefineValue(code, inst, crc); } else { - ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); + ctx.reg_alloc.HostCall(code, inst, args[0], args[1], {}); code.mov(code.ABI_PARAM3.cvt32(), data_size / CHAR_BIT); //zext code.CallFunction(&CRC32::ComputeCRC32Castagnoli); } @@ -38,11 +41,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) { - const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]); - const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(code, args[1]); + const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); @@ -64,12 +67,12 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); - ctx.reg_alloc.DefineValue(inst, crc); + ctx.reg_alloc.DefineValue(code, inst, crc); } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) { - const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); + const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code); code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); @@ -82,12 +85,12 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); - ctx.reg_alloc.DefineValue(inst, crc); + ctx.reg_alloc.DefineValue(code, inst, crc); } else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) { - const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]); - const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(code, args[1]); + const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code); code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641)); @@ -100,9 +103,9 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co code.pextrd(crc, xmm_value, 2); - ctx.reg_alloc.DefineValue(inst, crc); + ctx.reg_alloc.DefineValue(code, inst, crc); } else { - ctx.reg_alloc.HostCall(inst, args[0], args[1], {}); + ctx.reg_alloc.HostCall(code, inst, args[0], args[1], {}); code.mov(code.ABI_PARAM3, data_size / CHAR_BIT); code.CallFunction(&CRC32::ComputeCRC32ISO); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp index dfc09da173..e7fdef2696 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp @@ -24,124 +24,124 @@ using namespace Xbyak::util; void EmitX64::EmitPack2x32To1x64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 lo = ctx.reg_alloc.UseScratchGpr(args[0]); - const Xbyak::Reg64 hi = ctx.reg_alloc.UseScratchGpr(args[1]); + const Xbyak::Reg64 lo = ctx.reg_alloc.UseScratchGpr(code, args[0]); + const Xbyak::Reg64 hi = ctx.reg_alloc.UseScratchGpr(code, args[1]); code.shl(hi, 32); code.mov(lo.cvt32(), lo.cvt32()); // Zero extend to 64-bits code.or_(lo, hi); - ctx.reg_alloc.DefineValue(inst, lo); + ctx.reg_alloc.DefineValue(code, inst, lo); } void EmitX64::EmitPack2x64To1x128(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 lo = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 hi = ctx.reg_alloc.UseGpr(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg64 lo = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Reg64 hi = ctx.reg_alloc.UseGpr(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::SSE41)) { code.movq(result, lo); code.pinsrq(result, hi, 1); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movq(result, lo); code.movq(tmp, hi); code.punpcklqdq(result, tmp); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitLeastSignificantWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); // TODO: DefineValue directly on Argument - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(code, args[0]); code.mov(result.cvt32(), source.cvt32()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitMostSignificantWord(EmitContext& ctx, IR::Inst* inst) { const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.shr(result, 32); if (carry_inst) { - const Xbyak::Reg64 carry = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 carry = ctx.reg_alloc.ScratchGpr(code); code.setc(carry.cvt8()); - ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitLeastSignificantHalf(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); // TODO: DefineValue directly on Argument - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(code, args[0]); code.movzx(result.cvt32(), source.cvt16()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitLeastSignificantByte(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); // TODO: DefineValue directly on Argument - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(code, args[0]); code.movzx(result.cvt32(), source.cvt8()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitMostSignificantBit(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); // TODO: Flag optimization code.shr(result, 31); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitIsZero32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); // TODO: Flag optimization code.test(result, result); code.sete(result.cvt8()); code.movzx(result, result.cvt8()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitIsZero64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); // TODO: Flag optimization code.test(result, result); code.sete(result.cvt8()); code.movzx(result, result.cvt8()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitTestBit(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); ASSERT(args[1].IsImmediate()); // TODO: Flag optimization code.bt(result, args[1].GetImmediateU8()); code.setc(result.cvt8()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bitsize) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX).cvt32(); - const Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize); - const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize); + const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX).cvt32(); + const Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(bitsize); + const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(code, args[2]).changeBit(bitsize); code.mov(nzcv, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]); @@ -198,7 +198,7 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst* UNREACHABLE(); } - ctx.reg_alloc.DefineValue(inst, else_); + ctx.reg_alloc.DefineValue(code, inst, else_); } void EmitX64::EmitConditionalSelect32(EmitContext& ctx, IR::Inst* inst) { @@ -216,13 +216,13 @@ void EmitX64::EmitConditionalSelectNZCV(EmitContext& ctx, IR::Inst* inst) { static void EmitExtractRegister(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit_size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(bit_size); - const Xbyak::Reg operand = ctx.reg_alloc.UseGpr(args[1]).changeBit(bit_size); + const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(bit_size); + const Xbyak::Reg operand = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(bit_size); const u8 lsb = args[2].GetImmediateU8(); code.shrd(result, operand, lsb); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitExtractRegister32(Dynarmic::Backend::X64::EmitContext& ctx, IR::Inst* inst) { @@ -239,22 +239,22 @@ static void EmitReplicateBit(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst const u8 bit = args[1].GetImmediateU8(); if (bit == bit_size - 1) { - const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(bit_size); + const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(bit_size); code.sar(result, bit_size - 1); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } - const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bit_size); - const Xbyak::Reg result = ctx.reg_alloc.ScratchGpr().changeBit(bit_size); + const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bit_size); + const Xbyak::Reg result = ctx.reg_alloc.ScratchGpr(code).changeBit(bit_size); code.xor_(result, result); code.bt(value, bit); code.sbb(result, result); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitReplicateBit32(Dynarmic::Backend::X64::EmitContext& ctx, IR::Inst* inst) { @@ -275,7 +275,7 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { if (!carry_inst) { if (shift_arg.IsImmediate()) { - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); const u8 shift = shift_arg.GetImmediateU8(); if (shift <= 31) { @@ -284,23 +284,23 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { code.xor_(result, result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::BMI2)) { - const Xbyak::Reg32 shift = ctx.reg_alloc.UseGpr(shift_arg).cvt32(); - const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32(); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 shift = ctx.reg_alloc.UseGpr(code, shift_arg).cvt32(); + const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.shlx(result, operand, shift); code.xor_(zero, zero); code.cmp(shift.cvt8(), 32); code.cmovnb(result, zero); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - const Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr().cvt32(); + ctx.reg_alloc.Use(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr(code).cvt32(); // The 32-bit x64 SHL instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. @@ -310,13 +310,13 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { code.cmp(code.cl, 32); code.cmovnb(result, zero); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } else { if (shift_arg.IsImmediate()) { const u8 shift = shift_arg.GetImmediateU8(); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - const Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(code, carry_arg).cvt32(); if (shift == 0) { // There is nothing more to do. @@ -333,13 +333,13 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { code.and_(carry, 1); } - ctx.reg_alloc.DefineValue(carry_inst, carry); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt32(); + ctx.reg_alloc.UseScratch(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(code, carry_arg).cvt32(); code.mov(tmp, 63); code.cmp(code.cl, 63); @@ -350,8 +350,8 @@ void EmitX64::EmitLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { code.setc(carry.cvt8()); code.shr(result.cvt64(), 32); - ctx.reg_alloc.DefineValue(carry_inst, carry); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); + ctx.reg_alloc.DefineValue(code, inst, result); } } } @@ -362,7 +362,7 @@ void EmitX64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { auto& shift_arg = args[1]; if (shift_arg.IsImmediate()) { - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg); const u8 shift = shift_arg.GetImmediateU8(); if (shift < 64) { @@ -371,23 +371,23 @@ void EmitX64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { code.xor_(result.cvt32(), result.cvt32()); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::BMI2)) { - const Xbyak::Reg64 shift = ctx.reg_alloc.UseGpr(shift_arg); - const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg); - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 shift = ctx.reg_alloc.UseGpr(code, shift_arg); + const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(code, operand_arg); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr(code); code.shlx(result, operand, shift); code.xor_(zero.cvt32(), zero.cvt32()); code.cmp(shift.cvt8(), 64); code.cmovnb(result, zero); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); - const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr(); + ctx.reg_alloc.Use(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg); + const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr(code); // The x64 SHL instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 63 result in zeros. @@ -397,7 +397,7 @@ void EmitX64::EmitLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { code.cmp(code.cl, 64); code.cmovnb(result, zero); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } @@ -411,7 +411,7 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { if (!carry_inst) { if (shift_arg.IsImmediate()) { - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); const u8 shift = shift_arg.GetImmediateU8(); if (shift <= 31) { @@ -420,23 +420,23 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { code.xor_(result, result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::BMI2)) { - const Xbyak::Reg32 shift = ctx.reg_alloc.UseGpr(shift_arg).cvt32(); - const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32(); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 shift = ctx.reg_alloc.UseGpr(code, shift_arg).cvt32(); + const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.shrx(result, operand, shift); code.xor_(zero, zero); code.cmp(shift.cvt8(), 32); code.cmovnb(result, zero); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - const Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr().cvt32(); + ctx.reg_alloc.Use(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 zero = ctx.reg_alloc.ScratchGpr(code).cvt32(); // The 32-bit x64 SHR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 31 result in zeros. @@ -446,13 +446,13 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { code.cmp(code.cl, 32); code.cmovnb(result, zero); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } else { if (shift_arg.IsImmediate()) { const u8 shift = shift_arg.GetImmediateU8(); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - const Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(code, carry_arg).cvt32(); if (shift == 0) { // There is nothing more to do. @@ -468,13 +468,13 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { code.xor_(carry, carry); } - ctx.reg_alloc.DefineValue(carry_inst, carry); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX); - const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32(); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt32(); + ctx.reg_alloc.UseScratch(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(code, carry_arg).cvt32(); code.mov(result, 63); code.cmp(code.cl, 63); @@ -484,8 +484,8 @@ void EmitX64::EmitLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { code.shr(result.cvt64(), code.cl); code.setc(carry.cvt8()); - ctx.reg_alloc.DefineValue(carry_inst, carry); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); + ctx.reg_alloc.DefineValue(code, inst, result); } } } @@ -496,7 +496,7 @@ void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto& shift_arg = args[1]; if (shift_arg.IsImmediate()) { - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg); const u8 shift = shift_arg.GetImmediateU8(); if (shift < 64) { @@ -505,23 +505,23 @@ void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { code.xor_(result.cvt32(), result.cvt32()); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::BMI2)) { - const Xbyak::Reg64 shift = ctx.reg_alloc.UseGpr(shift_arg); - const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg); - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 shift = ctx.reg_alloc.UseGpr(code, shift_arg); + const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(code, operand_arg); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr(code); code.shrx(result, operand, shift); code.xor_(zero.cvt32(), zero.cvt32()); code.cmp(shift.cvt8(), 63); code.cmovnb(result, zero); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); - const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr(); + ctx.reg_alloc.Use(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg); + const Xbyak::Reg64 zero = ctx.reg_alloc.ScratchGpr(code); // The x64 SHR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count, so shifts above 63 result in zeros. @@ -531,7 +531,7 @@ void EmitX64::EmitLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { code.cmp(code.cl, 64); code.cmovnb(result, zero); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } @@ -546,16 +546,16 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { if (!carry_inst) { if (shift_arg.IsImmediate()) { const u8 shift = shift_arg.GetImmediateU8(); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); code.sar(result, u8(shift < 31 ? shift : 31)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::BMI2)) { - const Xbyak::Reg32 shift = ctx.reg_alloc.UseScratchGpr(shift_arg).cvt32(); - const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32(); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 const31 = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 shift = ctx.reg_alloc.UseScratchGpr(code, shift_arg).cvt32(); + const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 const31 = ctx.reg_alloc.ScratchGpr(code).cvt32(); // The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count. @@ -566,11 +566,11 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { code.cmovnb(shift, const31); code.sarx(result, operand, shift); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - const Xbyak::Reg32 const31 = ctx.reg_alloc.ScratchGpr().cvt32(); + ctx.reg_alloc.UseScratch(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 const31 = ctx.reg_alloc.ScratchGpr(code).cvt32(); // The 32-bit x64 SAR instruction masks the shift count by 0x1F before performing the shift. // ARM differs from the behaviour: It does not mask the count. @@ -581,13 +581,13 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { code.cmova(code.ecx, const31); code.sar(result, code.cl); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } else { if (shift_arg.IsImmediate()) { const u8 shift = shift_arg.GetImmediateU8(); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - const Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt8(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); + const Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(code, carry_arg).cvt8(); if (shift == 0) { // There is nothing more to do. @@ -600,13 +600,13 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { code.setc(carry); } - ctx.reg_alloc.DefineValue(carry_inst, carry); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX); - const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32(); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt32(); + ctx.reg_alloc.UseScratch(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 carry = ctx.reg_alloc.UseScratchGpr(code, carry_arg).cvt32(); code.mov(result, 63); code.cmp(code.cl, 63); @@ -616,8 +616,8 @@ void EmitX64::EmitArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { code.sar(result.cvt64(), code.cl); code.setc(carry.cvt8()); - ctx.reg_alloc.DefineValue(carry_inst, carry); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); + ctx.reg_alloc.DefineValue(code, inst, result); } } } @@ -629,27 +629,27 @@ void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { if (shift_arg.IsImmediate()) { const u8 shift = shift_arg.GetImmediateU8(); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg); code.sar(result, u8(shift < 63 ? shift : 63)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::BMI2)) { - const Xbyak::Reg64 shift = ctx.reg_alloc.UseScratchGpr(shift_arg); - const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg); - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 const63 = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 shift = ctx.reg_alloc.UseScratchGpr(code, shift_arg); + const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(code, operand_arg); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 const63 = ctx.reg_alloc.ScratchGpr(code); code.mov(const63.cvt32(), 63); code.cmp(shift.cvt8(), 63); code.cmovnb(shift, const63); code.sarx(result, operand, shift); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); - const Xbyak::Reg64 const63 = ctx.reg_alloc.ScratchGpr(); + ctx.reg_alloc.UseScratch(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg); + const Xbyak::Reg64 const63 = ctx.reg_alloc.ScratchGpr(code); // The 64-bit x64 SAR instruction masks the shift count by 0x3F before performing the shift. // ARM differs from the behaviour: It does not mask the count. @@ -660,7 +660,7 @@ void EmitX64::EmitArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { code.cmovnb(code.ecx, const63); code.sar(result, code.cl); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } @@ -675,33 +675,33 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { if (!carry_inst) { if (shift_arg.IsImmediate() && code.HasHostFeature(HostFeature::BMI2)) { const u8 shift = shift_arg.GetImmediateU8(); - const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32(); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.rorx(result, operand, shift); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else if (shift_arg.IsImmediate()) { const u8 shift = shift_arg.GetImmediateU8(); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); code.ror(result, u8(shift & 0x1F)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + ctx.reg_alloc.Use(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); // x64 ROR instruction does (shift & 0x1F) for us. code.ror(result, code.cl); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } else { if (shift_arg.IsImmediate()) { const u8 shift = shift_arg.GetImmediateU8(); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - const Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt8(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); + const Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(code, carry_arg).cvt8(); if (shift == 0) { // There is nothing more to do. @@ -713,12 +713,12 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { code.setc(carry); } - ctx.reg_alloc.DefineValue(carry_inst, carry); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.UseScratch(shift_arg, HostLoc::RCX); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); - const Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(carry_arg).cvt8(); + ctx.reg_alloc.UseScratch(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); + const Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(code, carry_arg).cvt8(); Xbyak::Label end; @@ -731,8 +731,8 @@ void EmitX64::EmitRotateRight32(EmitContext& ctx, IR::Inst* inst) { code.L(end); - ctx.reg_alloc.DefineValue(carry_inst, carry); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); + ctx.reg_alloc.DefineValue(code, inst, result); } } } @@ -744,27 +744,27 @@ void EmitX64::EmitRotateRight64(EmitContext& ctx, IR::Inst* inst) { if (shift_arg.IsImmediate() && code.HasHostFeature(HostFeature::BMI2)) { const u8 shift = shift_arg.GetImmediateU8(); - const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg); - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(code, operand_arg); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); code.rorx(result, operand, shift); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else if (shift_arg.IsImmediate()) { const u8 shift = shift_arg.GetImmediateU8(); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg); code.ror(result, u8(shift & 0x3F)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); + ctx.reg_alloc.Use(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg); // x64 ROR instruction does (shift & 0x3F) for us. code.ror(result, code.cl); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } @@ -772,8 +772,8 @@ void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) { const auto carry_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(args[1]).cvt8(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg8 carry = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt8(); code.bt(carry.cvt32(), 0); code.rcr(result, 1); @@ -781,10 +781,10 @@ void EmitX64::EmitRotateRightExtended(EmitContext& ctx, IR::Inst* inst) { if (carry_inst) { code.setc(carry); - ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -794,34 +794,34 @@ static void EmitMaskedShift32(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins auto& shift_arg = args[1]; if (shift_arg.IsImmediate()) { - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); const u32 shift = shift_arg.GetImmediateU32(); shift_fn(result, static_cast(shift & 0x1F)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if constexpr (!std::is_same_v) { if (code.HasHostFeature(HostFeature::BMI2)) { - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(operand_arg).cvt32(); - const Xbyak::Reg32 shift = ctx.reg_alloc.UseGpr(shift_arg).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 operand = ctx.reg_alloc.UseGpr(code, operand_arg).cvt32(); + const Xbyak::Reg32 shift = ctx.reg_alloc.UseGpr(code, shift_arg).cvt32(); (code.*bmi2_shift)(result, operand, shift); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } } - ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(operand_arg).cvt32(); + ctx.reg_alloc.Use(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg).cvt32(); shift_fn(result, code.cl); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -831,34 +831,34 @@ static void EmitMaskedShift64(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins auto& shift_arg = args[1]; if (shift_arg.IsImmediate()) { - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg); const u64 shift = shift_arg.GetImmediateU64(); shift_fn(result, static_cast(shift & 0x3F)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if constexpr (!std::is_same_v) { if (code.HasHostFeature(HostFeature::BMI2)) { - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(operand_arg); - const Xbyak::Reg64 shift = ctx.reg_alloc.UseGpr(shift_arg); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 operand = ctx.reg_alloc.UseGpr(code, operand_arg); + const Xbyak::Reg64 shift = ctx.reg_alloc.UseGpr(code, shift_arg); (code.*bmi2_shift)(result, operand, shift); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } } - ctx.reg_alloc.Use(shift_arg, HostLoc::RCX); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(operand_arg); + ctx.reg_alloc.Use(code, shift_arg, HostLoc::RCX); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, operand_arg); shift_fn(result, code.cl); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitLogicalShiftLeftMasked32(EmitContext& ctx, IR::Inst* inst) { @@ -901,11 +901,11 @@ void EmitX64::EmitRotateRightMasked64(EmitContext& ctx, IR::Inst* inst) { code, ctx, inst, [&](auto result, auto shift) { code.ror(result, shift); }, nullptr); } -static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) { +static Xbyak::Reg8 DoCarry(BlockOfCode& code, RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* carry_out) { if (carry_in.IsImmediate()) { - return carry_out ? reg_alloc.ScratchGpr().cvt8() : Xbyak::Reg8{-1}; + return carry_out ? reg_alloc.ScratchGpr(code).cvt8() : Xbyak::Reg8{-1}; } else { - return carry_out ? reg_alloc.UseScratchGpr(carry_in).cvt8() : reg_alloc.UseGpr(carry_in).cvt8(); + return carry_out ? reg_alloc.UseScratchGpr(code, carry_in).cvt8() : reg_alloc.UseGpr(code, carry_in).cvt8(); } } @@ -914,7 +914,7 @@ static Xbyak::Reg64 DoNZCV(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* nzc if (!nzcv_out) { return Xbyak::Reg64{-1}; } - const Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr(HostLoc::RAX); + const Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr(code, HostLoc::RAX); code.xor_(nzcv.cvt32(), nzcv.cvt32()); return nzcv; } @@ -930,28 +930,28 @@ static void EmitAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit // Consider using LEA. if (!carry_inst && !overflow_inst && !nzcv_inst && carry_in.IsImmediate() && !carry_in.GetImmediateU1()) { if (args[1].IsImmediate() && args[1].FitsInImmediateS32()) { - const Xbyak::Reg op1 = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); - const Xbyak::Reg result = ctx.reg_alloc.ScratchGpr().changeBit(bitsize); + const Xbyak::Reg op1 = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize); + const Xbyak::Reg result = ctx.reg_alloc.ScratchGpr(code).changeBit(bitsize); code.lea(result, code.ptr[op1 + args[1].GetImmediateS32()]); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Reg op1 = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); - const Xbyak::Reg op2 = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize); - const Xbyak::Reg result = ctx.reg_alloc.ScratchGpr().changeBit(bitsize); + const Xbyak::Reg op1 = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize); + const Xbyak::Reg op2 = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(bitsize); + const Xbyak::Reg result = ctx.reg_alloc.ScratchGpr(code).changeBit(bitsize); code.lea(result, code.ptr[op1 + op2]); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } return; } const Xbyak::Reg64 nzcv = DoNZCV(code, ctx.reg_alloc, nzcv_inst); - const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(bitsize); - const Xbyak::Reg8 carry = DoCarry(ctx.reg_alloc, carry_in, carry_inst); - const Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr().cvt8() : Xbyak::Reg8{-1}; + const Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(bitsize); + const Xbyak::Reg8 carry = DoCarry(code, ctx.reg_alloc, carry_in, carry_inst); + const Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr(code).cvt8() : Xbyak::Reg8{-1}; if (args[1].IsImmediate() && args[1].GetType() == IR::Type::U32) { const u32 op_arg = args[1].GetImmediateU32(); @@ -967,7 +967,7 @@ static void EmitAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit code.adc(result, op_arg); } } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); op_arg.setBit(bitsize); if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { @@ -985,17 +985,17 @@ static void EmitAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit if (nzcv_inst) { code.lahf(); code.seto(code.al); - ctx.reg_alloc.DefineValue(nzcv_inst, nzcv); + ctx.reg_alloc.DefineValue(code, nzcv_inst, nzcv); } if (carry_inst) { code.setc(carry); - ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); } if (overflow_inst) { code.seto(overflow); - ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(code, overflow_inst, overflow); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitAdd32(EmitContext& ctx, IR::Inst* inst) { @@ -1017,19 +1017,19 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit // Consider using LEA. if (!carry_inst && !overflow_inst && !nzcv_inst && carry_in.IsImmediate() && carry_in.GetImmediateU1() && args[1].IsImmediate() && args[1].FitsInImmediateS32() && args[1].GetImmediateS32() != 0xffff'ffff'8000'0000) { - const Xbyak::Reg op1 = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize); - const Xbyak::Reg result = ctx.reg_alloc.ScratchGpr().changeBit(bitsize); + const Xbyak::Reg op1 = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize); + const Xbyak::Reg result = ctx.reg_alloc.ScratchGpr(code).changeBit(bitsize); code.lea(result, code.ptr[op1 - args[1].GetImmediateS32()]); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } const Xbyak::Reg64 nzcv = DoNZCV(code, ctx.reg_alloc, nzcv_inst); - const Xbyak::Reg result = (is_cmp ? ctx.reg_alloc.UseGpr(args[0]) : ctx.reg_alloc.UseScratchGpr(args[0])).changeBit(bitsize); - const Xbyak::Reg8 carry = DoCarry(ctx.reg_alloc, carry_in, carry_inst); - const Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr().cvt8() : Xbyak::Reg8{-1}; + const Xbyak::Reg result = (is_cmp ? ctx.reg_alloc.UseGpr(code, args[0]) : ctx.reg_alloc.UseScratchGpr(code, args[0])).changeBit(bitsize); + const Xbyak::Reg8 carry = DoCarry(code, ctx.reg_alloc, carry_in, carry_inst); + const Xbyak::Reg8 overflow = overflow_inst ? ctx.reg_alloc.ScratchGpr(code).cvt8() : Xbyak::Reg8{-1}; // Note that x64 CF is inverse of what the ARM carry flag is here. @@ -1040,7 +1040,7 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit const u32 op_arg = args[1].GetImmediateU32(); code.cmp(result, op_arg); } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); op_arg.setBit(bitsize); code.cmp(result, *op_arg); } @@ -1059,7 +1059,7 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit invert_output_carry = false; } } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); op_arg.setBit(bitsize); if (carry_in.IsImmediate()) { if (carry_in.GetImmediateU1()) { @@ -1081,7 +1081,7 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit } code.lahf(); code.seto(code.al); - ctx.reg_alloc.DefineValue(nzcv_inst, nzcv); + ctx.reg_alloc.DefineValue(code, nzcv_inst, nzcv); } if (carry_inst) { if (invert_output_carry) { @@ -1089,14 +1089,14 @@ static void EmitSub(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int bit } else { code.setc(carry); } - ctx.reg_alloc.DefineValue(carry_inst, carry); + ctx.reg_alloc.DefineValue(code, carry_inst, carry); } if (overflow_inst) { code.seto(overflow); - ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(code, overflow_inst, overflow); } if (!is_cmp) { - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } @@ -1111,58 +1111,58 @@ void EmitX64::EmitSub64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitMul32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); if (args[1].IsImmediate()) { code.imul(result, result, args[1].GetImmediateU32()); } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); op_arg.setBit(32); code.imul(result, *op_arg); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitMul64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); code.imul(result, *op_arg); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitUnsignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); - ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX); - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); + ctx.reg_alloc.UseScratch(code, args[0], HostLoc::RAX); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); code.mul(*op_arg); - ctx.reg_alloc.DefineValue(inst, rdx); + ctx.reg_alloc.DefineValue(code, inst, rdx); } void EmitX64::EmitSignedMultiplyHigh64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); - ctx.reg_alloc.UseScratch(args[0], HostLoc::RAX); - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); + ctx.reg_alloc.UseScratch(code, args[0], HostLoc::RAX); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); code.imul(*op_arg); - ctx.reg_alloc.DefineValue(inst, rdx); + ctx.reg_alloc.DefineValue(code, inst, rdx); } void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); - const Xbyak::Reg32 dividend = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 divisor = ctx.reg_alloc.UseGpr(args[1]).cvt32(); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); + const Xbyak::Reg32 dividend = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 divisor = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); Xbyak::Label end; @@ -1174,16 +1174,16 @@ void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) { code.div(divisor); code.L(end); - ctx.reg_alloc.DefineValue(inst, eax); + ctx.reg_alloc.DefineValue(code, inst, eax); } void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); - const Xbyak::Reg64 dividend = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 divisor = ctx.reg_alloc.UseGpr(args[1]); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); + const Xbyak::Reg64 dividend = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Reg64 divisor = ctx.reg_alloc.UseGpr(code, args[1]); Xbyak::Label end; @@ -1195,16 +1195,16 @@ void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) { code.div(divisor); code.L(end); - ctx.reg_alloc.DefineValue(inst, rax); + ctx.reg_alloc.DefineValue(code, inst, rax); } void EmitX64::EmitSignedDiv32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); - const Xbyak::Reg32 dividend = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 divisor = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); + const Xbyak::Reg32 dividend = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 divisor = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); Xbyak::Label end; @@ -1217,16 +1217,16 @@ void EmitX64::EmitSignedDiv32(EmitContext& ctx, IR::Inst* inst) { code.idiv(divisor.cvt64()); code.L(end); - ctx.reg_alloc.DefineValue(inst, eax); + ctx.reg_alloc.DefineValue(code, inst, eax); } void EmitX64::EmitSignedDiv64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); - const Xbyak::Reg64 dividend = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 divisor = ctx.reg_alloc.UseGpr(args[1]); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); + const Xbyak::Reg64 dividend = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Reg64 divisor = ctx.reg_alloc.UseGpr(code, args[1]); Xbyak::Label end, ok; @@ -1244,65 +1244,65 @@ void EmitX64::EmitSignedDiv64(EmitContext& ctx, IR::Inst* inst) { code.idiv(divisor); code.L(end); - ctx.reg_alloc.DefineValue(inst, rax); + ctx.reg_alloc.DefineValue(code, inst, rax); } void EmitX64::EmitAnd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); if (args[1].IsImmediate()) { const u32 op_arg = args[1].GetImmediateU32(); code.and_(result, op_arg); } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); op_arg.setBit(32); code.and_(result, *op_arg); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitAnd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); if (args[1].FitsInImmediateS32()) { const u32 op_arg = u32(args[1].GetImmediateS32()); code.and_(result, op_arg); } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); op_arg.setBit(64); code.and_(result, *op_arg); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitAndNot32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (!args[0].IsImmediate() && !args[1].IsImmediate() && code.HasHostFeature(HostFeature::BMI1)) { - Xbyak::Reg32 op_a = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - Xbyak::Reg32 op_b = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + Xbyak::Reg32 op_a = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + Xbyak::Reg32 op_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); + Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.andn(result, op_b, op_a); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } Xbyak::Reg32 result; if (args[1].IsImmediate()) { - result = ctx.reg_alloc.ScratchGpr().cvt32(); + result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, u32(~args[1].GetImmediateU32())); } else { - result = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + result = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); code.not_(result); } @@ -1310,32 +1310,32 @@ void EmitX64::EmitAndNot32(EmitContext& ctx, IR::Inst* inst) { const u32 op_arg = args[0].GetImmediateU32(); code.and_(result, op_arg); } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[0]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[0]); op_arg.setBit(32); code.and_(result, *op_arg); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitAndNot64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (!args[0].IsImmediate() && !args[1].IsImmediate() && code.HasHostFeature(HostFeature::BMI1)) { - Xbyak::Reg64 op_a = ctx.reg_alloc.UseGpr(args[0]); - Xbyak::Reg64 op_b = ctx.reg_alloc.UseGpr(args[1]); - Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); + Xbyak::Reg64 op_a = ctx.reg_alloc.UseGpr(code, args[0]); + Xbyak::Reg64 op_b = ctx.reg_alloc.UseGpr(code, args[1]); + Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); code.andn(result, op_b, op_a); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } Xbyak::Reg64 result; if (args[1].IsImmediate()) { - result = ctx.reg_alloc.ScratchGpr(); + result = ctx.reg_alloc.ScratchGpr(code); code.mov(result, ~args[1].GetImmediateU64()); } else { - result = ctx.reg_alloc.UseScratchGpr(args[1]); + result = ctx.reg_alloc.UseScratchGpr(code, args[1]); code.not_(result); } @@ -1343,88 +1343,88 @@ void EmitX64::EmitAndNot64(EmitContext& ctx, IR::Inst* inst) { const u32 op_arg = u32(args[0].GetImmediateS32()); code.and_(result, op_arg); } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[0]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[0]); op_arg.setBit(64); code.and_(result, *op_arg); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitEor32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); if (args[1].IsImmediate()) { const u32 op_arg = args[1].GetImmediateU32(); code.xor_(result, op_arg); } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); op_arg.setBit(32); code.xor_(result, *op_arg); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitEor64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); if (args[1].FitsInImmediateS32()) { const u32 op_arg = u32(args[1].GetImmediateS32()); code.xor_(result, op_arg); } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); op_arg.setBit(64); code.xor_(result, *op_arg); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitOr32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); if (args[1].IsImmediate()) { const u32 op_arg = args[1].GetImmediateU32(); code.or_(result, op_arg); } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); op_arg.setBit(32); code.or_(result, *op_arg); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitOr64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); if (args[1].FitsInImmediateS32()) { const u32 op_arg = u32(args[1].GetImmediateS32()); code.or_(result, op_arg); } else { - OpArg op_arg = ctx.reg_alloc.UseOpArg(args[1]); + OpArg op_arg = ctx.reg_alloc.UseOpArg(code, args[1]); op_arg.setBit(64); code.or_(result, *op_arg); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitNot32(EmitContext& ctx, IR::Inst* inst) { @@ -1432,13 +1432,13 @@ void EmitX64::EmitNot32(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg32 result; if (args[0].IsImmediate()) { - result = ctx.reg_alloc.ScratchGpr().cvt32(); + result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.mov(result, u32(~args[0].GetImmediateU32())); } else { - result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.not_(result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitNot64(EmitContext& ctx, IR::Inst* inst) { @@ -1446,62 +1446,62 @@ void EmitX64::EmitNot64(EmitContext& ctx, IR::Inst* inst) { Xbyak::Reg64 result; if (args[0].IsImmediate()) { - result = ctx.reg_alloc.ScratchGpr(); + result = ctx.reg_alloc.ScratchGpr(code); code.mov(result, ~args[0].GetImmediateU64()); } else { - result = ctx.reg_alloc.UseScratchGpr(args[0]); + result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.not_(result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitSignExtendByteToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.movsx(result.cvt32(), result.cvt8()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitSignExtendHalfToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.movsx(result.cvt32(), result.cvt16()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitSignExtendByteToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.movsx(result.cvt64(), result.cvt8()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitSignExtendHalfToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.movsx(result.cvt64(), result.cvt16()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitSignExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.movsxd(result.cvt64(), result.cvt32()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitZeroExtendByteToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.movzx(result.cvt32(), result.cvt8()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitZeroExtendHalfToWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.movzx(result.cvt32(), result.cvt16()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitZeroExtendByteToLong(EmitContext& ctx, IR::Inst* inst) { @@ -1516,59 +1516,59 @@ void EmitX64::EmitZeroExtendHalfToLong(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitZeroExtendWordToLong(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.mov(result.cvt32(), result.cvt32()); // x64 zeros upper 32 bits on a 32-bit move - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitZeroExtendLongToQuad(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (args[0].IsInGpr()) { - const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + if (args[0].IsInGpr(ctx.reg_alloc)) { + const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movq(result, source); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.movq(result, result); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } void EmitX64::EmitByteReverseWord(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); code.bswap(result); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitByteReverseHalf(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg16 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt16(); + const Xbyak::Reg16 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt16(); code.rol(result, 8); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitByteReverseDual(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.bswap(result); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::LZCNT)) { - const Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 source = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.lzcnt(result, source); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Reg32 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 temp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 source = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 temp = ctx.reg_alloc.ScratchGpr(code).cvt32(); // The result of a bsr of zero is undefined, but zf is set after it. code.bsr(result, source); @@ -1576,23 +1576,23 @@ void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { code.xor_(result, 31); code.test(source, source); code.cmove(result, temp); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::LZCNT)) { - const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]).cvt64(); - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); + const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(code, args[0]).cvt64(); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code).cvt64(); code.lzcnt(result, source); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64(); - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); - const Xbyak::Reg64 temp = ctx.reg_alloc.ScratchGpr().cvt64(); + const Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt64(); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code).cvt64(); + const Xbyak::Reg64 temp = ctx.reg_alloc.ScratchGpr(code).cvt64(); // The result of a bsr of zero is undefined, but zf is set after it. code.bsr(result, source); @@ -1600,104 +1600,104 @@ void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) { code.xor_(result.cvt32(), 63); code.test(source, source); code.cmove(result.cvt32(), temp.cvt32()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } void EmitX64::EmitMaxSigned32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); code.cmp(x, y); code.cmovge(y, x); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } void EmitX64::EmitMaxSigned64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); + const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(code, args[1]); code.cmp(x, y); code.cmovge(y, x); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } void EmitX64::EmitMaxUnsigned32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); code.cmp(x, y); code.cmova(y, x); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } void EmitX64::EmitMaxUnsigned64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); + const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(code, args[1]); code.cmp(x, y); code.cmova(y, x); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } void EmitX64::EmitMinSigned32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); code.cmp(x, y); code.cmovle(y, x); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } void EmitX64::EmitMinSigned64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); + const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(code, args[1]); code.cmp(x, y); code.cmovle(y, x); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } void EmitX64::EmitMinUnsigned32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + const Xbyak::Reg32 x = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); code.cmp(x, y); code.cmovb(y, x); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } void EmitX64::EmitMinUnsigned64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); + const Xbyak::Reg64 x = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(code, args[1]); code.cmp(x, y); code.cmovb(y, x); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp index 67d0e06808..6e618130c4 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_floating_point.cpp @@ -261,7 +261,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { SharedLabel end = GenSharedLabel(); - Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (!ctx.FPCR().DN() && !ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { end = ProcessNaN(code, ctx, result); @@ -280,7 +280,7 @@ void FPTwoOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { } code.L(*end); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -290,8 +290,8 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (ctx.FPCR().DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(code, args[1]); if constexpr (std::is_member_function_pointer_v) { (code.*fn)(result, operand); @@ -303,14 +303,14 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) ForceToDefaultNaN(code, result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } - const Xbyak::Xmm op1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm op2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Xmm op1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm op2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); @@ -337,7 +337,7 @@ void FPThreeOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) EmitPostProcessNaNs(code, result, op1, op2, tmp, *end); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } // anonymous namespace @@ -348,12 +348,12 @@ void FPAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { constexpr FPT non_sign_mask = FP::FPInfo::sign_mask - FPT(1u); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Address mask = code.Const(xword, non_sign_mask); code.andps(result, mask); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPAbs16(EmitContext& ctx, IR::Inst* inst) { @@ -374,12 +374,12 @@ void FPNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { constexpr FPT sign_mask = FP::FPInfo::sign_mask; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Address mask = code.Const(xword, u64(sign_mask)); code.xorps(result, mask); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPNeg16(EmitContext& ctx, IR::Inst* inst) { @@ -410,14 +410,14 @@ void EmitX64::EmitFPDiv64(EmitContext& ctx, IR::Inst* inst) { FPThreeOp<64>(code, ctx, inst, &Xbyak::CodeGenerator::divsd); } -template -static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { +template +static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool is_max) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Reg64 gpr_scratch = ctx.reg_alloc.ScratchGpr(code); DenormalsAreZero(code, ctx, {result, operand}); @@ -425,7 +425,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { FCODE(ucomis)(result, operand); code.jz(*equal, code.T_NEAR); - if constexpr (is_max) { + if (is_max) { FCODE(maxs)(result, operand); } else { FCODE(mins)(result, operand); @@ -437,7 +437,7 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.L(*equal); code.jp(nan); - if constexpr (is_max) { + if (is_max) { code.andps(result, operand); } else { code.orps(result, operand); @@ -455,25 +455,25 @@ static void EmitFPMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } -template -static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) noexcept { +template +static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool is_max) noexcept { using FPT = mcl::unsigned_integer_of_size; constexpr FPT default_nan = FP::FPInfo::DefaultNaN(); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm op1 = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm op2 = ctx.reg_alloc.UseScratchXmm(args[1]); // Result stored here! + const Xbyak::Xmm op1 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm op2 = ctx.reg_alloc.UseScratchXmm(code, args[1]); // Result stored here! DenormalsAreZero(code, ctx, {op1, op2}); if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { // vrangep{s,d} will already correctly handle comparing // signed zeros and propagating NaNs similar to ARM - constexpr FpRangeSelect range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min; + FpRangeSelect const range_select = is_max ? FpRangeSelect::Max : FpRangeSelect::Min; FCODE(vranges)(op2, op1, op2, FpRangeLUT(range_select, FpRangeSign::Preserve)); if (ctx.FPCR().DN()) { @@ -481,7 +481,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR:: FCODE(vmovs)(op2 | k1, code.Const(xword, default_nan)); } } else { - Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr(); + Xbyak::Reg tmp = ctx.reg_alloc.ScratchGpr(code); tmp.setBit(fsize); const auto move_to_tmp = [=, &code](const Xbyak::Xmm& xmm) { @@ -496,7 +496,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR:: FCODE(ucomis)(op1, op2); code.jz(*z, code.T_NEAR); - if constexpr (is_max) { + if (is_max) { FCODE(maxs)(op2, op1); } else { FCODE(mins)(op2, op1); @@ -510,7 +510,7 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR:: code.L(*z); code.jp(nan); - if constexpr (is_max) { + if (is_max) { code.andps(op2, op1); } else { code.orps(op2, op1); @@ -567,39 +567,39 @@ static inline void EmitFPMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR:: }); } - ctx.reg_alloc.DefineValue(inst, op2); + ctx.reg_alloc.DefineValue(code, inst, op2); } void EmitX64::EmitFPMax32(EmitContext& ctx, IR::Inst* inst) { - EmitFPMinMax<32, true>(code, ctx, inst); + EmitFPMinMax<32>(code, ctx, inst, true); } void EmitX64::EmitFPMax64(EmitContext& ctx, IR::Inst* inst) { - EmitFPMinMax<64, true>(code, ctx, inst); + EmitFPMinMax<64>(code, ctx, inst, true); } void EmitX64::EmitFPMaxNumeric32(EmitContext& ctx, IR::Inst* inst) { - EmitFPMinMaxNumeric<32, true>(code, ctx, inst); + EmitFPMinMaxNumeric<32>(code, ctx, inst, true); } void EmitX64::EmitFPMaxNumeric64(EmitContext& ctx, IR::Inst* inst) { - EmitFPMinMaxNumeric<64, true>(code, ctx, inst); + EmitFPMinMaxNumeric<64>(code, ctx, inst, true); } void EmitX64::EmitFPMin32(EmitContext& ctx, IR::Inst* inst) { - EmitFPMinMax<32, false>(code, ctx, inst); + EmitFPMinMax<32>(code, ctx, inst, false); } void EmitX64::EmitFPMin64(EmitContext& ctx, IR::Inst* inst) { - EmitFPMinMax<64, false>(code, ctx, inst); + EmitFPMinMax<64>(code, ctx, inst, false); } void EmitX64::EmitFPMinNumeric32(EmitContext& ctx, IR::Inst* inst) { - EmitFPMinMaxNumeric<32, false>(code, ctx, inst); + EmitFPMinMaxNumeric<32>(code, ctx, inst, false); } void EmitX64::EmitFPMinNumeric64(EmitContext& ctx, IR::Inst* inst) { - EmitFPMinMaxNumeric<64, false>(code, ctx, inst); + EmitFPMinMaxNumeric<64>(code, ctx, inst, false); } void EmitX64::EmitFPMul32(EmitContext& ctx, IR::Inst* inst) { @@ -610,8 +610,8 @@ void EmitX64::EmitFPMul64(EmitContext& ctx, IR::Inst* inst) { FPThreeOp<64>(code, ctx, inst, &Xbyak::CodeGenerator::mulsd); } -template -static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { +template +static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool negate_product) { using FPT = mcl::unsigned_integer_of_size; const auto fallback_fn = negate_product ? &FP::FPMulSub : &FP::FPMulAdd; @@ -622,11 +622,11 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const bool needs_nan_correction = !ctx.FPCR().DN(); if (code.HasHostFeature(HostFeature::FMA) && !needs_rounding_correction && !needs_nan_correction) { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(code, args[2]); - if constexpr (negate_product) { + if (negate_product) { FCODE(vfnmadd231s)(result, operand2, operand3); } else { FCODE(vfmadd231s)(result, operand2, operand3); @@ -635,20 +635,20 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { ForceToDefaultNaN(code, result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) { SharedLabel fallback = GenSharedLabel(), end = GenSharedLabel(); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(code, args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movaps(result, operand1); - if constexpr (negate_product) { + if (negate_product) { FCODE(vfnmadd231s)(result, operand2, operand3); } else { FCODE(vfmadd231s)(result, operand2, operand3); @@ -752,7 +752,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.ptest(operand2, xmm0); code.jnz(op2_done); code.vorps(result, operand2, xmm0); - if constexpr (negate_product) { + if (negate_product) { code.xorps(result, code.Const(xword, FP::FPInfo::sign_mask)); } code.jmp(*end); @@ -768,7 +768,7 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { // at this point, all SNaNs have been handled // if op1 was not a QNaN and op2 is, negate the result - if constexpr (negate_product) { + if (negate_product) { FCODE(ucomis)(operand1, operand1); code.jp(*end); FCODE(ucomis)(operand2, operand2); @@ -780,34 +780,34 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(code, args[2]); - if constexpr (negate_product) { + if (negate_product) { code.xorps(operand2, code.Const(xword, FP::FPInfo::sign_mask)); } FCODE(muls)(operand2, operand3); FCODE(adds)(operand1, operand2); - ctx.reg_alloc.DefineValue(inst, operand1); + ctx.reg_alloc.DefineValue(code, inst, operand1); return; } } - ctx.reg_alloc.HostCall(inst, args[0], args[1], args[2]); + ctx.reg_alloc.HostCall(code, inst, args[0], args[1], args[2]); code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value()); #ifdef _WIN32 - ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(code, 16 + ABI_SHADOW_SPACE); code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(qword[rsp + ABI_SHADOW_SPACE], rax); code.CallFunction(fallback_fn); - ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE); #else code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(fallback_fn); @@ -815,27 +815,27 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } void EmitX64::EmitFPMulAdd16(EmitContext& ctx, IR::Inst* inst) { - EmitFPMulAdd<16, false>(code, ctx, inst); + EmitFPMulAdd<16>(code, ctx, inst, false); } void EmitX64::EmitFPMulAdd32(EmitContext& ctx, IR::Inst* inst) { - EmitFPMulAdd<32, false>(code, ctx, inst); + EmitFPMulAdd<32>(code, ctx, inst, false); } void EmitX64::EmitFPMulAdd64(EmitContext& ctx, IR::Inst* inst) { - EmitFPMulAdd<64, false>(code, ctx, inst); + EmitFPMulAdd<64>(code, ctx, inst, false); } void EmitX64::EmitFPMulSub16(EmitContext& ctx, IR::Inst* inst) { - EmitFPMulAdd<16, true>(code, ctx, inst); + EmitFPMulAdd<16>(code, ctx, inst, true); } void EmitX64::EmitFPMulSub32(EmitContext& ctx, IR::Inst* inst) { - EmitFPMulAdd<32, true>(code, ctx, inst); + EmitFPMulAdd<32>(code, ctx, inst, true); } void EmitX64::EmitFPMulSub64(EmitContext& ctx, IR::Inst* inst) { - EmitFPMulAdd<64, true>(code, ctx, inst); + EmitFPMulAdd<64>(code, ctx, inst, true); } template @@ -846,10 +846,10 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const bool do_default_nan = ctx.FPCR().DN(); - const Xbyak::Xmm op1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm op2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg64 tmp = do_default_nan ? INVALID_REG : ctx.reg_alloc.ScratchGpr(); + const Xbyak::Xmm op1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm op2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Reg64 tmp = do_default_nan ? INVALID_REG : ctx.reg_alloc.ScratchGpr(code); SharedLabel end = GenSharedLabel(), nan = GenSharedLabel(); @@ -887,7 +887,7 @@ static void EmitFPMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPMulX32(EmitContext& ctx, IR::Inst* inst) { @@ -905,8 +905,8 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i if constexpr (fsize != 16) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { FCODE(vrcp14s)(result, operand, operand); @@ -920,13 +920,13 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } } auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipEstimate); @@ -949,7 +949,7 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i using FPT = mcl::unsigned_integer_of_size; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipExponent); @@ -977,23 +977,23 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* if (code.HasHostFeature(HostFeature::FMA) && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { Xbyak::Label end, fallback; - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movaps(result, code.Const(xword, FP::FPValue())); FCODE(vfnmadd231s)(result, operand1, operand2); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (code.HasHostFeature(HostFeature::FMA)) { SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movaps(result, code.Const(xword, FP::FPValue())); FCODE(vfnmadd231s)(result, operand1, operand2); @@ -1018,25 +1018,25 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.jmp(*end, code.T_NEAR); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movaps(result, code.Const(xword, FP::FPValue())); FCODE(muls)(operand1, operand2); FCODE(subs)(result, operand1); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } } - ctx.reg_alloc.HostCall(inst, args[0], args[1]); + ctx.reg_alloc.HostCall(code, inst, args[0], args[1]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRecipStepFused); @@ -1079,7 +1079,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz auto args = ctx.reg_alloc.GetArgumentInfo(inst); // See EmitFPRoundThunk auto const extra_args = u32(rounding_mode) | (u32(exact) << 8); - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM4.cvt32(), extra_args); @@ -1111,8 +1111,8 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i if constexpr (fsize != 16) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { FCODE(vrsqrt14s)(result, operand, operand); @@ -1126,16 +1126,16 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(); - [[maybe_unused]] const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code); + [[maybe_unused]] const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel(); @@ -1261,10 +1261,10 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i } }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtEstimate); @@ -1291,24 +1291,24 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* if constexpr (fsize != 16) { if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX) && ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vmovaps(result, code.Const(xword, FP::FPValue())); FCODE(vfnmadd231s)(result, operand1, operand2); FCODE(vmuls)(result, result, code.Const(xword, FP::FPValue())); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) { SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vmovaps(result, code.Const(xword, FP::FPValue())); FCODE(vfnmadd231s)(result, operand1, operand2); @@ -1317,7 +1317,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* // Why do we need to care about infinities? This is because x86 doesn't allow us // to fuse the divide-by-two with the rest of the FMA operation. Therefore the // intermediate value may overflow and we would like to handle this case. - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.vpextrw(tmp, result, fsize == 32 ? 1 : 3); code.and_(tmp.cvt16(), fsize == 32 ? 0x7f80 : 0x7ff0); code.cmp(tmp.cvt16(), fsize == 32 ? 0x7f00 : 0x7fe0); @@ -1345,26 +1345,26 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.jmp(*end, code.T_NEAR); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movaps(result, code.Const(xword, FP::FPValue())); FCODE(muls)(operand1, operand2); FCODE(subs)(result, operand1); FCODE(muls)(result, code.Const(xword, FP::FPValue())); - ctx.reg_alloc.DefineValue(inst, operand1); + ctx.reg_alloc.DefineValue(code, inst, operand1); return; } } - ctx.reg_alloc.HostCall(inst, args[0], args[1]); + ctx.reg_alloc.HostCall(code, inst, args[0], args[1]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.CallFunction(&FP::FPRSqrtStepFused); @@ -1399,8 +1399,8 @@ void EmitX64::EmitFPSub64(EmitContext& ctx, IR::Inst* inst) { } static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { - ctx.reg_alloc.ScratchGpr(HostLoc::RCX); // shifting requires use of cl - const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX); // shifting requires use of cl + const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(code); // x64 flags ARM flags // ZF PF CF NZCV @@ -1427,8 +1427,8 @@ static Xbyak::Reg64 SetFpscrNzcvFromFlags(BlockOfCode& code, EmitContext& ctx) { void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm reg_a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm reg_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm reg_a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm reg_b = ctx.reg_alloc.UseXmm(code, args[1]); const bool exc_on_qnan = args[2].GetImmediateU1(); if (exc_on_qnan) { @@ -1438,13 +1438,13 @@ void EmitX64::EmitFPCompare32(EmitContext& ctx, IR::Inst* inst) { } const Xbyak::Reg64 nzcv = SetFpscrNzcvFromFlags(code, ctx); - ctx.reg_alloc.DefineValue(inst, nzcv); + ctx.reg_alloc.DefineValue(code, inst, nzcv); } void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm reg_a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm reg_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm reg_a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm reg_b = ctx.reg_alloc.UseXmm(code, args[1]); const bool exc_on_qnan = args[2].GetImmediateU1(); if (exc_on_qnan) { @@ -1454,7 +1454,7 @@ void EmitX64::EmitFPCompare64(EmitContext& ctx, IR::Inst* inst) { } const Xbyak::Reg64 nzcv = SetFpscrNzcvFromFlags(code, ctx); - ctx.reg_alloc.DefineValue(inst, nzcv); + ctx.reg_alloc.DefineValue(code, inst, nzcv); } void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) { @@ -1462,8 +1462,8 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) { const auto rounding_mode = static_cast(args[1].GetImmediateU8()); if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(code, args[0]); // Double-conversion here is acceptable as this is expanding precision. code.vcvtph2ps(result, value); @@ -1472,11 +1472,11 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) { ForceToDefaultNaN<64>(code, result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); @@ -1488,19 +1488,19 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) { const auto rounding_mode = static_cast(args[1].GetImmediateU8()); if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(code, args[0]); code.vcvtph2ps(result, value); if (ctx.FPCR().DN()) { ForceToDefaultNaN<32>(code, result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); @@ -1513,15 +1513,15 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) { // We special-case the non-IEEE-defined ToOdd rounding mode. if (rounding_mode == ctx.FPCR().RMode() && rounding_mode != FP::RoundingMode::ToOdd) { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.cvtss2sd(result, result); if (ctx.FPCR().DN()) { ForceToDefaultNaN<64>(code, result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); @@ -1535,18 +1535,18 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) { const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (ctx.FPCR().DN()) { ForceToDefaultNaN<32>(code, result); } code.vcvtps2ph(result, result, static_cast(*round_imm)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); @@ -1560,7 +1560,7 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) { // NOTE: Do not double-convert here as that is inaccurate. // To be accurate, the first conversion would need to be "round-to-odd", which x64 doesn't support. - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); @@ -1573,15 +1573,15 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) { // We special-case the non-IEEE-defined ToOdd rounding mode. if (rounding_mode == ctx.FPCR().RMode() && rounding_mode != FP::RoundingMode::ToOdd) { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.cvtsd2ss(result, result); if (ctx.FPCR().DN()) { ForceToDefaultNaN<32>(code, result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM3.cvt32(), static_cast(rounding_mode)); code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); @@ -1620,8 +1620,8 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { const bool truncating = rounding_mode == FP::RoundingMode::TowardsZero; if (round_imm && (truncating || code.HasHostFeature(HostFeature::SSE41))) { - const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64(); + const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code).cvt64(); if constexpr (fsize == 64) { if (fbits != 0) { @@ -1646,7 +1646,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } if constexpr (isize == 64) { - const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(code); if (!unsigned_) { SharedLabel saturate_max = GenSharedLabel(), end = GenSharedLabel(); @@ -1667,7 +1667,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } else { Xbyak::Label below_max; - const Xbyak::Reg64 result2 = ctx.reg_alloc.ScratchGpr().cvt64(); + const Xbyak::Reg64 result2 = ctx.reg_alloc.ScratchGpr(code).cvt64(); code.pxor(xmm0, xmm0); @@ -1691,7 +1691,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } } else if constexpr (isize == 32) { if (!unsigned_) { - const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(code); ZeroIfNaN<64>(code, src, scratch); code.minsd(src, code.Const(xword, f64_max_s32)); @@ -1704,7 +1704,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.cvttsd2si(result, src); // 64 bit gpr } } else { - const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm scratch = ctx.reg_alloc.ScratchXmm(code); ZeroIfNaN<64>(code, src, scratch); code.maxsd(src, code.Const(xword, unsigned_ ? f64_min_u16 : f64_min_s16)); @@ -1712,7 +1712,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.cvttsd2si(result, src); // 64 bit gpr } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -1720,7 +1720,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { // See EmitFPToFixedThunk auto const extra_args = (u32(unsigned_) << 24) | (u32(isize) << 16) | (u32(rounding_mode) << 8) | (u32(fbits)); - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]); code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value()); code.mov(code.ABI_PARAM4.cvt32(), extra_args); @@ -1802,9 +1802,9 @@ void EmitX64::EmitFPSingleToFixedU64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPFixedS16ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(code, args[0]).cvt16(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required @@ -1816,15 +1816,15 @@ void EmitX64::EmitFPFixedS16ToSingle(EmitContext& ctx, IR::Inst* inst) { code.mulss(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPFixedU16ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(code, args[0]).cvt16(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required @@ -1836,14 +1836,14 @@ void EmitX64::EmitFPFixedU16ToSingle(EmitContext& ctx, IR::Inst* inst) { code.mulss(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); @@ -1861,23 +1861,23 @@ void EmitX64::EmitFPFixedS32ToSingle(EmitContext& ctx, IR::Inst* inst) { code.mulss(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); const auto op = [&] { if (code.HasHostFeature(HostFeature::AVX512F)) { - const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(code, args[0]); code.vcvtusi2ss(result, result, from.cvt32()); } else { // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed - const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary code.cvtsi2ss(result, from); } @@ -1897,15 +1897,15 @@ void EmitX64::EmitFPFixedU32ToSingle(EmitContext& ctx, IR::Inst* inst) { code.mulss(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPFixedS16ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(code, args[0]).cvt16(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required @@ -1917,15 +1917,15 @@ void EmitX64::EmitFPFixedS16ToDouble(EmitContext& ctx, IR::Inst* inst) { code.mulsd(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPFixedU16ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(args[0]).cvt16(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg16 from = ctx.reg_alloc.UseGpr(code, args[0]).cvt16(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required @@ -1937,14 +1937,14 @@ void EmitX64::EmitFPFixedU16ToDouble(EmitContext& ctx, IR::Inst* inst) { code.mulsd(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg32 from = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required @@ -1955,24 +1955,24 @@ void EmitX64::EmitFPFixedS32ToDouble(EmitContext& ctx, IR::Inst* inst) { code.mulsd(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm to = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); [[maybe_unused]] const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); // Not required code.xorps(to, to); if (code.HasHostFeature(HostFeature::AVX512F)) { - const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(code, args[0]); code.vcvtusi2sd(to, to, from.cvt32()); } else { // We are using a 64-bit GPR register to ensure we don't end up treating the input as signed - const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.mov(from.cvt32(), from.cvt32()); // TODO: Verify if this is necessary code.cvtsi2sd(to, from); } @@ -1982,14 +1982,14 @@ void EmitX64::EmitFPFixedU32ToDouble(EmitContext& ctx, IR::Inst* inst) { code.mulsd(to, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, to); + ctx.reg_alloc.DefineValue(code, inst, to); } void EmitX64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); ASSERT(rounding_mode == ctx.FPCR().RMode()); @@ -2001,14 +2001,14 @@ void EmitX64::EmitFPFixedS64ToDouble(EmitContext& ctx, IR::Inst* inst) { code.mulsd(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); ASSERT(rounding_mode == ctx.FPCR().RMode()); @@ -2020,14 +2020,14 @@ void EmitX64::EmitFPFixedS64ToSingle(EmitContext& ctx, IR::Inst* inst) { code.mulss(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); ASSERT(rounding_mode == ctx.FPCR().RMode()); @@ -2035,7 +2035,7 @@ void EmitX64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512F)) { code.vcvtusi2sd(result, result, from); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movq(tmp, from); code.punpckldq(tmp, code.Const(xword, 0x4530000043300000, 0)); @@ -2052,22 +2052,22 @@ void EmitX64::EmitFPFixedU64ToDouble(EmitContext& ctx, IR::Inst* inst) { code.mulsd(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const size_t fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); ASSERT(rounding_mode == ctx.FPCR().RMode()); if (code.HasHostFeature(HostFeature::AVX512F)) { - const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 from = ctx.reg_alloc.UseGpr(code, args[0]); code.vcvtusi2ss(result, result, from); } else { - const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(args[0]); + const Xbyak::Reg64 from = ctx.reg_alloc.UseScratchGpr(code, args[0]); code.pxor(result, result); Xbyak::Label negative; @@ -2080,7 +2080,7 @@ void EmitX64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) { code.jmp(end); code.L(negative); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); code.mov(tmp, from); code.shr(tmp, 1); code.and_(from.cvt32(), 1); @@ -2096,7 +2096,7 @@ void EmitX64::EmitFPFixedU64ToSingle(EmitContext& ctx, IR::Inst* inst) { code.mulss(result, code.Const(xword, scale_factor)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 71437dfe8f..900c836273 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -54,14 +54,14 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { if (!conf.page_table && !fastmem_marker) { // Neither fastmem nor page table: Use callbacks if constexpr (bitsize == 128) { - ctx.reg_alloc.HostCall(nullptr, {}, args[1]); + ctx.reg_alloc.HostCall(code, nullptr, {}, args[1]); if (ordered) { code.mfence(); } code.CallFunction(memory_read_128); - ctx.reg_alloc.DefineValue(inst, xmm1); + ctx.reg_alloc.DefineValue(code, inst, xmm1); } else { - ctx.reg_alloc.HostCall(inst, {}, args[1]); + ctx.reg_alloc.HostCall(code, inst, {}, args[1]); if (ordered) { code.mfence(); } @@ -74,14 +74,14 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { if (ordered && bitsize == 128) { // Required for atomic 128-bit loads/stores - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - ctx.reg_alloc.ScratchGpr(HostLoc::RBX); - ctx.reg_alloc.ScratchGpr(HostLoc::RCX); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); } - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); - const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx(); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]); + const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm(code).getIdx() : ctx.reg_alloc.ScratchGpr(code).getIdx(); const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)]; @@ -126,9 +126,9 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) { code.L(*end); if constexpr (bitsize == 128) { - ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx}); + ctx.reg_alloc.DefineValue(code, inst, Xbyak::Xmm{value_idx}); } else { - ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx}); + ctx.reg_alloc.DefineValue(code, inst, Xbyak::Reg64{value_idx}); } } @@ -141,13 +141,13 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { if (!conf.page_table && !fastmem_marker) { // Neither fastmem nor page table: Use callbacks if constexpr (bitsize == 128) { - ctx.reg_alloc.Use(args[1], ABI_PARAM2); - ctx.reg_alloc.Use(args[2], HostLoc::XMM1); + ctx.reg_alloc.Use(code, args[1], ABI_PARAM2); + ctx.reg_alloc.Use(code, args[2], HostLoc::XMM1); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); code.CallFunction(memory_write_128); } else { - ctx.reg_alloc.HostCall(nullptr, {}, args[1], args[2]); + ctx.reg_alloc.HostCall(code, nullptr, {}, args[1], args[2]); Devirtualize(conf.callbacks).EmitCall(code); } if (ordered) { @@ -159,16 +159,16 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) { if (ordered && bitsize == 128) { // Required for atomic 128-bit loads/stores - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - ctx.reg_alloc.ScratchGpr(HostLoc::RBX); - ctx.reg_alloc.ScratchGpr(HostLoc::RCX); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); } - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]); const int value_idx = bitsize == 128 - ? ctx.reg_alloc.UseXmm(args[2]).getIdx() - : (ordered ? ctx.reg_alloc.UseScratchGpr(args[2]).getIdx() : ctx.reg_alloc.UseGpr(args[2]).getIdx()); + ? ctx.reg_alloc.UseXmm(code, args[2]).getIdx() + : (ordered ? ctx.reg_alloc.UseScratchGpr(code, args[2]).getIdx() : ctx.reg_alloc.UseGpr(code, args[2]).getIdx()); const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)]; @@ -222,7 +222,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { if constexpr (bitsize != 128) { using T = mcl::unsigned_integer_of_size; - ctx.reg_alloc.HostCall(inst, {}, args[1]); + ctx.reg_alloc.HostCall(code, inst, {}, args[1]); code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); @@ -237,14 +237,14 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { }); code.ZeroExtendFrom(bitsize, code.ABI_RETURN); } else { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - ctx.reg_alloc.Use(args[1], ABI_PARAM2); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + ctx.reg_alloc.Use(code, args[1], ABI_PARAM2); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1)); code.mov(code.ABI_PARAM1, reinterpret_cast(&conf)); - ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(code, 16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); if (ordered) { code.mfence(); @@ -256,9 +256,9 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) { }); }); code.movups(result, xword[rsp + ABI_SHADOW_SPACE]); - ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } EmitCheckMemoryAbort(ctx, inst); @@ -271,15 +271,15 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { const bool ordered = IsOrdered(args[3].GetImmediateAccType()); if constexpr (bitsize == 128) { - ctx.reg_alloc.Use(args[1], ABI_PARAM2); - ctx.reg_alloc.Use(args[2], HostLoc::XMM1); + ctx.reg_alloc.Use(code, args[1], ABI_PARAM2); + ctx.reg_alloc.Use(code, args[2], HostLoc::XMM1); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(inst); + ctx.reg_alloc.HostCall(code, inst); } else { - ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]); + ctx.reg_alloc.HostCall(code, inst, {}, args[1], args[2]); } - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); Xbyak::Label end; code.mov(code.ABI_RETURN, u32(1)); code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]); @@ -299,7 +299,7 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { code.mfence(); } } else { - ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(code, 16 + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]); code.movaps(xword[code.ABI_PARAM3], xmm1); code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 { @@ -310,7 +310,7 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) { if (ordered) { code.mfence(); } - ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE); } code.L(end); @@ -330,16 +330,16 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in if constexpr (ordered && bitsize == 128) { // Required for atomic 128-bit loads/stores - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - ctx.reg_alloc.ScratchGpr(HostLoc::RBX); - ctx.reg_alloc.ScratchGpr(HostLoc::RCX); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); } - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); - const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]); + const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm(code).getIdx() : ctx.reg_alloc.ScratchGpr(code).getIdx(); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code); const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)]; @@ -386,9 +386,9 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32()); if constexpr (bitsize == 128) { - ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx}); + ctx.reg_alloc.DefineValue(code, inst, Xbyak::Xmm{value_idx}); } else { - ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx}); + ctx.reg_alloc.DefineValue(code, inst, Xbyak::Reg64{value_idx}); } EmitCheckMemoryAbort(ctx, inst); @@ -407,19 +407,19 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const auto value = [&] { if constexpr (bitsize == 128) { - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - ctx.reg_alloc.ScratchGpr(HostLoc::RBX); - ctx.reg_alloc.ScratchGpr(HostLoc::RCX); - ctx.reg_alloc.ScratchGpr(HostLoc::RDX); - return ctx.reg_alloc.UseXmm(args[2]); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); + return ctx.reg_alloc.UseXmm(code, args[2]); } else { - ctx.reg_alloc.ScratchGpr(HostLoc::RAX); - return ctx.reg_alloc.UseGpr(args[2]); + ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); + return ctx.reg_alloc.UseGpr(code, args[2]); } }(); - const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); - const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]); + const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())]; @@ -518,7 +518,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i code.L(*end); EmitExclusiveUnlock(code, conf, tmp, eax); - ctx.reg_alloc.DefineValue(inst, status); + ctx.reg_alloc.DefineValue(code, inst, status); EmitCheckMemoryAbort(ctx, inst); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index 93fa592a26..01efa3840b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -75,8 +75,8 @@ Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, EmitContext& ctx, size_t bitsiz template<> [[maybe_unused]] Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) { - const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr(code).cvt32(); EmitDetectMisalignedVAddr(code, ctx, bitsize, abort, vaddr, tmp.cvt64()); @@ -105,8 +105,8 @@ template<> const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits; const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits; - const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr(code); EmitDetectMisalignedVAddr(code, ctx, bitsize, abort, vaddr, tmp); @@ -116,7 +116,7 @@ template<> } else if (ctx.conf.silently_mirror_page_table) { if (valid_page_index_bits >= 32) { if (code.HasHostFeature(HostFeature::BMI2)) { - const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr(code); code.mov(bit_count, unused_top_bits); code.bzhi(tmp, vaddr, bit_count); code.shr(tmp, int(page_bits)); @@ -168,7 +168,7 @@ template<> return r13 + vaddr; } else if (ctx.conf.silently_mirror_fastmem) { if (!tmp) { - tmp = ctx.reg_alloc.ScratchGpr(); + tmp = ctx.reg_alloc.ScratchGpr(code); } if (unused_top_bits < 32) { code.mov(*tmp, vaddr); @@ -189,7 +189,7 @@ template<> } else { // TODO: Consider having TEST as above but coalesce 64-bit constant in register allocator if (!tmp) { - tmp = ctx.reg_alloc.ScratchGpr(); + tmp = ctx.reg_alloc.ScratchGpr(code); } code.mov(*tmp, vaddr); code.shr(*tmp, int(ctx.conf.fastmem_address_space_bits)); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_packed.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_packed.cpp index 90f06a9015..c29d7d2648 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_packed.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_packed.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2016 MerryMage * SPDX-License-Identifier: 0BSD @@ -16,14 +19,14 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.paddb(xmm_a, xmm_b); if (ge_inst) { - const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqb(ones, ones); @@ -32,21 +35,21 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) { code.pcmpeqb(xmm_ge, xmm_b); code.pxor(xmm_ge, ones); - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge); } - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); if (ge_inst) { - const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqb(xmm0, xmm0); @@ -54,27 +57,27 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) { code.paddsb(xmm_ge, xmm_b); code.pcmpgtb(xmm_ge, xmm0); - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge); } code.paddb(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.paddw(xmm_a, xmm_b); if (ge_inst) { if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqb(ones, ones); @@ -83,10 +86,10 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { code.pcmpeqw(xmm_ge, xmm_b); code.pxor(xmm_ge, ones); - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge); } else { - const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(code); // !(b <= a+b) == b > a+b code.movdqa(tmp_a, xmm_a); @@ -95,22 +98,22 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) { code.paddw(tmp_b, code.Const(xword, 0x80008000)); code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison! - ctx.reg_alloc.DefineValue(ge_inst, tmp_b); + ctx.reg_alloc.DefineValue(code, ge_inst, tmp_b); } } - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); if (ge_inst) { - const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqw(xmm0, xmm0); @@ -118,45 +121,45 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) { code.paddsw(xmm_ge, xmm_b); code.pcmpgtw(xmm_ge, xmm0); - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge); } code.paddw(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); if (ge_inst) { - const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code); code.movdqa(xmm_ge, xmm_a); code.pmaxub(xmm_ge, xmm_b); code.pcmpeqb(xmm_ge, xmm_a); - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge); } code.psubb(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); if (ge_inst) { - const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqb(xmm0, xmm0); @@ -164,12 +167,12 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) { code.psubsb(xmm_ge, xmm_b); code.pcmpgtb(xmm_ge, xmm0); - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge); } code.psubb(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { @@ -177,19 +180,19 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); if (!ge_inst) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.psubw(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); return; } if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code); code.movdqa(xmm_ge, xmm_a); code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1 @@ -197,15 +200,15 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { code.psubw(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); return; } - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code); // (a >= b) == !(b > a) code.pcmpeqb(ones, ones); @@ -217,19 +220,19 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) { code.psubw(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); if (ge_inst) { - const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqw(xmm0, xmm0); @@ -237,21 +240,21 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) { code.psubsw(xmm_ge, xmm_b); code.pcmpgtw(xmm_ge, xmm0); - ctx.reg_alloc.DefineValue(ge_inst, xmm_ge); + ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge); } code.psubw(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (args[0].IsInXmm() || args[1].IsInXmm()) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(); + if (args[0].IsInXmm(ctx.reg_alloc) || args[1].IsInXmm(ctx.reg_alloc)) { + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code); // Since, // pavg(a, b) == (a + b + 1) >> 1 @@ -264,11 +267,11 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { code.pavgb(xmm_a, xmm_b); code.pxor(xmm_a, ones); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else { - const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); + const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32(); const Xbyak::Reg32 and_a_b = reg_a; const Xbyak::Reg32 result = reg_a; @@ -284,17 +287,17 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { code.and_(xor_a_b, 0x7F7F7F7F); code.add(result, xor_a_b); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (args[0].IsInXmm() || args[1].IsInXmm()) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + if (args[0].IsInXmm(ctx.reg_alloc) || args[1].IsInXmm(ctx.reg_alloc)) { + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, xmm_a); code.pand(xmm_a, xmm_b); @@ -302,11 +305,11 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) { code.psrlw(tmp, 1); code.paddw(xmm_a, tmp); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else { - const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); + const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32(); const Xbyak::Reg32 and_a_b = reg_a; const Xbyak::Reg32 result = reg_a; @@ -322,19 +325,19 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) { code.and_(xor_a_b, 0x7FFF7FFF); code.add(result, xor_a_b); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); + const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32(); const Xbyak::Reg32 and_a_b = reg_a; const Xbyak::Reg32 result = reg_a; - const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr(code).cvt32(); // This relies on the equality x+y == ((x&y) << 1) + (x^y). // Note that x^y always contains the LSB of the result. @@ -352,15 +355,15 @@ void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { code.add(result, xor_a_b); code.xor_(result, carry); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); // This relies on the equality x+y == ((x&y) << 1) + (x^y). // Note that x^y always contains the LSB of the result. @@ -373,14 +376,14 @@ void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) { code.psraw(tmp, 1); code.paddw(xmm_a, tmp); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -403,16 +406,16 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { code.xor_(minuend, 0x80808080); // minuend now contains the desired result. - ctx.reg_alloc.DefineValue(inst, minuend); + ctx.reg_alloc.DefineValue(code, inst, minuend); } void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); + const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); - const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr(code).cvt32(); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -439,14 +442,14 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { code.xor_(minuend, 0x80808080); code.xor_(minuend, carry); - ctx.reg_alloc.DefineValue(inst, minuend); + ctx.reg_alloc.DefineValue(code, inst, minuend); } void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(code, args[1]); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -462,14 +465,14 @@ void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) { code.psubw(minuend, subtrahend); - ctx.reg_alloc.DefineValue(inst, minuend); + ctx.reg_alloc.DefineValue(code, inst, minuend); } void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(code, args[1]); // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). // Note that x^y always contains the LSB of the result. @@ -485,17 +488,17 @@ void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) { code.psubw(minuend, subtrahend); - ctx.reg_alloc.DefineValue(inst, minuend); + ctx.reg_alloc.DefineValue(code, inst, minuend); } static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp); - const Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); - const Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); + const Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr(code).cvt32(); Xbyak::Reg32 reg_sum, reg_diff; if (is_signed) { @@ -543,7 +546,7 @@ static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst code.and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000); code.or_(ge_sum, ge_diff); - ctx.reg_alloc.DefineValue(ge_inst, ge_sum); + ctx.reg_alloc.DefineValue(code, ge_inst, ge_sum); } if (is_halving) { @@ -557,7 +560,7 @@ static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst // Merge them. code.shld(reg_a_hi, reg_a_lo, 16); - ctx.reg_alloc.DefineValue(inst, reg_a_hi); + ctx.reg_alloc.DefineValue(code, inst, reg_a_hi); } void EmitX64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) { @@ -595,12 +598,12 @@ void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) { static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); (code.*fn)(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) { @@ -638,9 +641,9 @@ void EmitX64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitPackedAbsDiffSumU8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); // TODO: Optimize with zero-extension detection code.movaps(tmp, code.Const(xword, 0x0000'0000'ffff'ffff)); @@ -648,45 +651,45 @@ void EmitX64::EmitPackedAbsDiffSumU8(EmitContext& ctx, IR::Inst* inst) { code.pand(xmm_b, tmp); code.psadbw(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const size_t num_args_in_xmm = args[0].IsInXmm() + args[1].IsInXmm() + args[2].IsInXmm(); + const size_t num_args_in_xmm = args[0].IsInXmm(ctx.reg_alloc) + args[1].IsInXmm(ctx.reg_alloc) + args[2].IsInXmm(ctx.reg_alloc); if (num_args_in_xmm >= 2) { - const Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]); + const Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm to = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(code, args[2]); code.pand(from, ge); code.pandn(ge, to); code.por(from, ge); - ctx.reg_alloc.DefineValue(inst, from); + ctx.reg_alloc.DefineValue(code, inst, from); } else if (code.HasHostFeature(HostFeature::BMI1)) { - const Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); - const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); + const Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); + const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(code, args[2]).cvt32(); code.and_(from, ge); code.andn(to, ge, to); code.or_(from, to); - ctx.reg_alloc.DefineValue(inst, from); + ctx.reg_alloc.DefineValue(code, inst, from); } else { - const Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32(); - const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); + const Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); + const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(code, args[2]).cvt32(); code.and_(from, ge); code.not_(ge); code.and_(ge, to); code.or_(from, ge); - ctx.reg_alloc.DefineValue(inst, from); + ctx.reg_alloc.DefineValue(code, inst, from); } } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp index 31231c02aa..2ba67f5a13 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_saturation.cpp @@ -34,9 +34,9 @@ template void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size); - Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]).changeBit(size); - Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr().changeBit(size); + Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(size); + Xbyak::Reg addend = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(size); + Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(code).changeBit(size); constexpr u64 int_max = static_cast((std::numeric_limits>::max)()); if constexpr (size < 64) { @@ -66,21 +66,21 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) code.seto(overflow.cvt8()); if constexpr (has_overflow_inst) { if (const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)) { - ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(code, overflow_inst, overflow); } } else { code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size); - Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]).changeBit(size); + Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(size); + Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(code, args[1]).changeBit(size); constexpr u64 boundary = op == Op::Add ? (std::numeric_limits>::max)() : 0; @@ -96,11 +96,11 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst code.cmovae(addend, op_result); } - const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(code); code.setb(overflow.cvt8()); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8()); - ctx.reg_alloc.DefineValue(inst, addend); + ctx.reg_alloc.DefineValue(code, inst, addend); } } // anonymous namespace @@ -126,10 +126,10 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { overflow_inst->ReplaceUsesWith(no_overflow); } // TODO: DefineValue directly on Argument - const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]); + const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(code, args[0]); code.mov(result.cvt32(), source.cvt32()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -137,9 +137,9 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { const u32 positive_saturated_value = (1u << (N - 1)) - 1; const u32 negative_saturated_value = 1u << (N - 1); - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr(code).cvt32(); // overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value. code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]); @@ -156,10 +156,10 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) { if (overflow_inst) { code.seta(overflow.cvt8()); - ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(code, overflow_inst, overflow); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { @@ -171,9 +171,9 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { const u32 saturated_value = (1u << N) - 1; - const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32(); - const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32(); - const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32(); + const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr(code).cvt32(); // Pseudocode: result = clamp(reg_a, 0, saturated_value); code.xor_(overflow, overflow); @@ -185,10 +185,10 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) { if (overflow_inst) { code.seta(overflow.cvt8()); - ctx.reg_alloc.DefineValue(overflow_inst, overflow); + ctx.reg_alloc.DefineValue(code, overflow_inst, overflow); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) { @@ -210,9 +210,9 @@ void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32(); - const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32(); + const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.movsx(x, x.cvt16()); code.movsx(y, y.cvt16()); @@ -228,15 +228,15 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, code.sets(tmp.cvt8()); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]); - const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(code, args[0]); + const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(code, args[1]); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); code.movsxd(x, x.cvt32()); code.movsxd(y, y.cvt32()); @@ -252,7 +252,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, code.sets(tmp.cvt8()); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8()); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) { diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_sha.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_sha.cpp index 92e0841d8b..cd166f0cb8 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_sha.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_sha.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -22,9 +25,9 @@ void EmitX64::EmitSHA256Hash(EmitContext& ctx, IR::Inst* inst) { // y = h g f e // w = wk3 wk2 wk1 wk0 - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm w = ctx.reg_alloc.UseXmm(args[2]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm w = ctx.reg_alloc.UseXmm(code, args[2]); // x64 expects: // 3 2 1 0 @@ -45,7 +48,7 @@ void EmitX64::EmitSHA256Hash(EmitContext& ctx, IR::Inst* inst) { code.shufps(y, x, part1 ? 0b10111011 : 0b00010001); - ctx.reg_alloc.DefineValue(inst, y); + ctx.reg_alloc.DefineValue(code, inst, y); } void EmitX64::EmitSHA256MessageSchedule0(EmitContext& ctx, IR::Inst* inst) { @@ -53,12 +56,12 @@ void EmitX64::EmitSHA256MessageSchedule0(EmitContext& ctx, IR::Inst* inst) { ASSERT(code.HasHostFeature(HostFeature::SHA)); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); code.sha256msg1(x, y); - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitSHA256MessageSchedule1(EmitContext& ctx, IR::Inst* inst) { @@ -66,16 +69,16 @@ void EmitX64::EmitSHA256MessageSchedule1(EmitContext& ctx, IR::Inst* inst) { ASSERT(code.HasHostFeature(HostFeature::SHA)); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm z = ctx.reg_alloc.UseXmm(args[2]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm z = ctx.reg_alloc.UseXmm(code, args[2]); code.movaps(xmm0, z); code.palignr(xmm0, y, 4); code.paddd(x, xmm0); code.sha256msg2(x, z); - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_sm4.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_sm4.cpp index b084a92f91..e121e3119e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_sm4.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_sm4.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2018 MerryMage * SPDX-License-Identifier: 0BSD @@ -13,7 +16,7 @@ namespace Dynarmic::Backend::X64 { void EmitX64::EmitSM4AccessSubstitutionBox(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.HostCall(inst, args[0]); + ctx.reg_alloc.HostCall(code, inst, args[0]); code.CallFunction(&Common::Crypto::SM4::AccessSubstitutionBox); code.movzx(code.ABI_RETURN.cvt32(), code.ABI_RETURN.cvt8()); } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp index ae7594aed5..2fa07cc946 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector.cpp @@ -47,24 +47,24 @@ template static void EmitVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); (code.*fn)(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } template static void EmitAVXVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); (code.*fn)(xmm_a, xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } template @@ -72,12 +72,12 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); - ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.HostCall(code, nullptr); + ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); @@ -85,9 +85,9 @@ static void EmitOneArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -95,12 +95,12 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); - ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.HostCall(code, nullptr); + ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); @@ -108,11 +108,11 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -120,13 +120,13 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 3 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); - ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.HostCall(code, nullptr); + ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); @@ -136,11 +136,11 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -148,13 +148,13 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code, const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 2 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); const u8 arg2 = args[1].GetImmediateU8(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); - ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.HostCall(code, nullptr); + ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); @@ -163,11 +163,11 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code, code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8()); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -175,13 +175,13 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins const auto fn = static_cast*>(lambda); constexpr u32 stack_space = 3 * 16; auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); - ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.HostCall(code, nullptr); + ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]); code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]); @@ -191,9 +191,9 @@ static void EmitTwoArgumentFallback(BlockOfCode& code, EmitContext& ctx, IR::Ins code.CallFunction(fn); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + 0 * 16]); - ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) { @@ -203,8 +203,8 @@ void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) { // TODO: DefineValue directly on Argument for index == 0 - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (code.HasHostFeature(HostFeature::SSE41)) { code.pextrb(dest, source, index); @@ -217,7 +217,7 @@ void EmitX64::EmitVectorGetElement8(EmitContext& ctx, IR::Inst* inst) { } } - ctx.reg_alloc.DefineValue(inst, dest); + ctx.reg_alloc.DefineValue(code, inst, dest); } void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) { @@ -227,10 +227,10 @@ void EmitX64::EmitVectorGetElement16(EmitContext& ctx, IR::Inst* inst) { // TODO: DefineValue directly on Argument for index == 0 - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pextrw(dest, source, index); - ctx.reg_alloc.DefineValue(inst, dest); + ctx.reg_alloc.DefineValue(code, inst, dest); } void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) { @@ -240,18 +240,18 @@ void EmitX64::EmitVectorGetElement32(EmitContext& ctx, IR::Inst* inst) { // TODO: DefineValue directly on Argument for index == 0 - const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 dest = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); code.pextrd(dest, source, index); } else { - const Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshufd(source, source, index); code.movd(dest, source); } - ctx.reg_alloc.DefineValue(inst, dest); + ctx.reg_alloc.DefineValue(code, inst, dest); } void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) { @@ -261,42 +261,42 @@ void EmitX64::EmitVectorGetElement64(EmitContext& ctx, IR::Inst* inst) { if (index == 0) { // TODO: DefineValue directly on Argument for index == 0 - const Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr().cvt64(); - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr(code).cvt64(); + const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); code.movq(dest, source); - ctx.reg_alloc.DefineValue(inst, dest); + ctx.reg_alloc.DefineValue(code, inst, dest); return; } - const Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr().cvt64(); + const Xbyak::Reg64 dest = ctx.reg_alloc.ScratchGpr(code).cvt64(); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm source = ctx.reg_alloc.UseXmm(code, args[0]); code.pextrq(dest, source, 1); } else { - const Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm source = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.punpckhqdq(source, source); code.movq(dest, source); } - ctx.reg_alloc.DefineValue(inst, dest); + ctx.reg_alloc.DefineValue(code, inst, dest); } void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Reg8 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt8(); + const Xbyak::Reg8 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt8(); code.pinsrb(source_vector, source_elem.cvt32(), index); - ctx.reg_alloc.DefineValue(inst, source_vector); + ctx.reg_alloc.DefineValue(code, inst, source_vector); } else { - const Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); - const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(code, args[2]).cvt32(); + const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pextrw(tmp, source_vector, index / 2); if (index % 2 == 0) { @@ -310,7 +310,7 @@ void EmitX64::EmitVectorSetElement8(EmitContext& ctx, IR::Inst* inst) { } code.pinsrw(source_vector, tmp, index / 2); - ctx.reg_alloc.DefineValue(inst, source_vector); + ctx.reg_alloc.DefineValue(code, inst, source_vector); } } @@ -319,34 +319,34 @@ void EmitX64::EmitVectorSetElement16(EmitContext& ctx, IR::Inst* inst) { ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Reg16 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt16(); + const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Reg16 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt16(); code.pinsrw(source_vector, source_elem.cvt32(), index); - ctx.reg_alloc.DefineValue(inst, source_vector); + ctx.reg_alloc.DefineValue(code, inst, source_vector); } void EmitX64::EmitVectorSetElement32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Reg32 source_elem = ctx.reg_alloc.UseGpr(args[2]).cvt32(); + const Xbyak::Reg32 source_elem = ctx.reg_alloc.UseGpr(code, args[2]).cvt32(); code.pinsrd(source_vector, source_elem, index); - ctx.reg_alloc.DefineValue(inst, source_vector); + ctx.reg_alloc.DefineValue(code, inst, source_vector); } else { - const Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32(); + const Xbyak::Reg32 source_elem = ctx.reg_alloc.UseScratchGpr(code, args[2]).cvt32(); code.pinsrw(source_vector, source_elem, index * 2); code.shr(source_elem, 16); code.pinsrw(source_vector, source_elem, index * 2 + 1); - ctx.reg_alloc.DefineValue(inst, source_vector); + ctx.reg_alloc.DefineValue(code, inst, source_vector); } } @@ -354,17 +354,17 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); - const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm source_vector = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(args[2]); + const Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(code, args[2]); code.pinsrq(source_vector, source_elem, index); - ctx.reg_alloc.DefineValue(inst, source_vector); + ctx.reg_alloc.DefineValue(code, inst, source_vector); } else { - const Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(args[2]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Reg64 source_elem = ctx.reg_alloc.UseGpr(code, args[2]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movq(tmp, source_elem); @@ -374,7 +374,7 @@ void EmitX64::EmitVectorSetElement64(EmitContext& ctx, IR::Inst* inst) { code.punpcklqdq(source_vector, tmp); } - ctx.reg_alloc.DefineValue(inst, source_vector); + ctx.reg_alloc.DefineValue(code, inst, source_vector); } } @@ -382,7 +382,7 @@ static void VectorAbs8(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& da if (code.HasHostFeature(HostFeature::SSSE3)) { code.pabsb(data, data); } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); code.pxor(temp, temp); code.psubb(temp, data); code.pminub(data, temp); @@ -393,7 +393,7 @@ static void VectorAbs16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& d if (code.HasHostFeature(HostFeature::SSSE3)) { code.pabsw(data, data); } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); code.pxor(temp, temp); code.psubw(temp, data); code.pmaxsw(data, temp); @@ -404,7 +404,7 @@ static void VectorAbs32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& d if (code.HasHostFeature(HostFeature::SSSE3)) { code.pabsd(data, data); } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(temp, data); code.psrad(temp, 31); code.pxor(data, temp); @@ -416,7 +416,7 @@ static void VectorAbs64(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& d if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { code.vpabsq(data, data); } else { - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); code.pshufd(temp, data, 0b11110101); code.psrad(temp, 31); code.pxor(data, temp); @@ -427,7 +427,7 @@ static void VectorAbs64(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& d static void EmitVectorAbs(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); switch (esize) { case 8: @@ -444,7 +444,7 @@ static void EmitVectorAbs(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockO break; } - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorAbs8(EmitContext& ctx, IR::Inst* inst) { @@ -486,12 +486,12 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorAndNot(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.pandn(xmm_b, xmm_a); - ctx.reg_alloc.DefineValue(inst, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_b); } static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) { @@ -503,7 +503,7 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const return; } - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.punpckhbw(tmp, result); code.punpcklbw(result, result); @@ -515,46 +515,46 @@ static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const void EmitX64::EmitVectorArithmeticShiftRight8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); ArithmeticShiftRightByte(ctx, code, result, shift_amount); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorArithmeticShiftRight16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psraw(result, shift_amount); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorArithmeticShiftRight32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrad(result, shift_amount); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = (std::min)(args[1].GetImmediateU8(), u8(63)); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { code.vpsraq(result, result, shift_amount); } else { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); const u64 sign_bit = 0x80000000'00000000u >> shift_amount; @@ -566,7 +566,7 @@ void EmitX64::EmitVectorArithmeticShiftRight64(EmitContext& ctx, IR::Inst* inst) code.por(result, tmp2); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -607,10 +607,10 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -629,7 +629,7 @@ void EmitX64::EmitVectorArithmeticVShift16(EmitContext& ctx, IR::Inst* inst) { code.vpsllvw(result, result, left_shift); code.vpblendmb(result | mask, result, tmp); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -642,9 +642,9 @@ void EmitX64::EmitVectorArithmeticVShift32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX2)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); // store sign bit of lowest byte of each element of b to select left/right shift later code.vpslld(xmm0, b, 24); @@ -659,7 +659,7 @@ void EmitX64::EmitVectorArithmeticVShift32(EmitContext& ctx, IR::Inst* inst) { code.blendvps(result, a); // implicit argument: xmm0 (sign of lowest byte of b) - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -672,10 +672,10 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa32(tmp, code.Const(xword, 0x00000000000000FF, 0x00000000000000FF)); code.vpxorq(right_shift, right_shift, right_shift); @@ -692,17 +692,17 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { code.vpsllvq(result, result, left_shift); code.vpblendmq(result | mask, result, tmp); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (code.HasHostFeature(HostFeature::AVX2)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm negative_mask = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm negative_mask = ctx.reg_alloc.ScratchXmm(code); // negative_mask = a < 0 ? 1s : 0s code.vpxor(xmm0, xmm0, xmm0); @@ -726,7 +726,7 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { code.blendvpd(result, a); // implicit argument: xmm0 (sign of lowest byte of b) - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -737,13 +737,13 @@ void EmitX64::EmitVectorArithmeticVShift64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); code.vmovq(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); @@ -753,35 +753,35 @@ void EmitX64::EmitVectorBroadcastLower8(EmitContext& ctx, IR::Inst* inst) { code.pshuflw(a, a, 0); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcastLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(a, a, 0); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcastLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(a, a, 0b01000100); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); @@ -791,12 +791,12 @@ void EmitX64::EmitVectorBroadcast8(EmitContext& ctx, IR::Inst* inst) { code.punpcklqdq(a, a); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastw(a, a); @@ -805,12 +805,12 @@ void EmitX64::EmitVectorBroadcast16(EmitContext& ctx, IR::Inst* inst) { code.punpcklqdq(a, a); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastd(a, a); @@ -818,12 +818,12 @@ void EmitX64::EmitVectorBroadcast32(EmitContext& ctx, IR::Inst* inst) { code.pshufd(a, a, 0); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastq(a, a); @@ -831,12 +831,12 @@ void EmitX64::EmitVectorBroadcast64(EmitContext& ctx, IR::Inst* inst) { code.punpcklqdq(a, a); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 16); @@ -849,7 +849,7 @@ void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) code.vpbroadcastb(a, a); code.vmovq(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); @@ -859,12 +859,12 @@ void EmitX64::EmitVectorBroadcastElementLower8(EmitContext& ctx, IR::Inst* inst) code.pshuflw(a, a, 0); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcastElementLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 8); @@ -875,12 +875,12 @@ void EmitX64::EmitVectorBroadcastElementLower16(EmitContext& ctx, IR::Inst* inst code.pshuflw(a, a, 0); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcastElementLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 4); @@ -891,12 +891,12 @@ void EmitX64::EmitVectorBroadcastElementLower32(EmitContext& ctx, IR::Inst* inst code.pshuflw(a, a, 0b01'00'01'00); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 16); @@ -908,7 +908,7 @@ void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastb(a, a); } else if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pshufb(a, tmp); @@ -917,12 +917,12 @@ void EmitX64::EmitVectorBroadcastElement8(EmitContext& ctx, IR::Inst* inst) { code.pshuflw(a, a, 0); code.punpcklqdq(a, a); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcastElement16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 8); @@ -930,7 +930,7 @@ void EmitX64::EmitVectorBroadcastElement16(EmitContext& ctx, IR::Inst* inst) { if (index == 0 && code.HasHostFeature(HostFeature::AVX2)) { code.vpbroadcastw(a, a); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); return; } @@ -942,24 +942,24 @@ void EmitX64::EmitVectorBroadcastElement16(EmitContext& ctx, IR::Inst* inst) { code.punpckhqdq(a, a); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcastElement32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 4); code.pshufd(a, a, mcl::bit::replicate_element<2, u8>(index)); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorBroadcastElement64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); ASSERT(args[1].IsImmediate()); const u8 index = args[1].GetImmediateU8(); ASSERT(index < 2); @@ -973,7 +973,7 @@ void EmitX64::EmitVectorBroadcastElement64(EmitContext& ctx, IR::Inst* inst) { code.punpckhqdq(a, a); } } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } template @@ -995,8 +995,8 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::GFNI)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); // Reverse bits: code.gf2p8affineqb(data, code.BConst<64>(xword, 0x8040201008040201), 0); @@ -1009,13 +1009,13 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) { // Convert lowest set bit into an index code.gf2p8affineqb(result, code.BConst<64>(xword, 0xaaccf0ff'00000000), 8); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp1, code.Const(xword, 0x0101010102020304, 0x0000000000000000)); code.movdqa(tmp2, tmp1); @@ -1031,7 +1031,7 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) { code.pand(data, tmp2); code.paddb(data, tmp1); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } else { EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } @@ -1041,10 +1041,10 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.vpsrlw(tmp, data, 1); code.vpor(data, data, tmp); @@ -1066,14 +1066,14 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { code.vpor(data, data, tmp); code.vpshufb(result, result, data); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psrlw(tmp, 1); @@ -1099,7 +1099,7 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { code.por(data, tmp); code.pshufb(result, data); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } @@ -1108,13 +1108,13 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512CD)) { - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vplzcntd(data, data); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); // See https://stackoverflow.com/questions/58823140/count-leading-zero-bits-for-each-element-in-avx2-vector-emulate-mm256-lzcnt-ep/58827596#58827596 } else if (code.HasHostFeature(HostFeature::AVX2)) { - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(temp, data); code.vpsrld(data, data, 8); code.vpandn(data, data, temp); @@ -1123,7 +1123,7 @@ void EmitX64::EmitVectorCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { code.vpsrld(data, data, 23); code.vpsubusw(data, temp, data); code.vpminsw(data, data, code.Const(xword, 0x0000002000000020, 0x0000002000000020)); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } else { EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros); } @@ -1131,25 +1131,25 @@ void EmitX64::EmitVectorCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lhs, tmp); code.pand(rhs, tmp); code.packuswb(lhs, rhs); - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); code.pxor(zero, zero); code.pblendw(lhs, zero, 0b10101010); @@ -1165,41 +1165,41 @@ void EmitX64::EmitVectorDeinterleaveEven16(EmitContext& ctx, IR::Inst* inst) { code.packssdw(lhs, rhs); } - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveEven32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufps(lhs, rhs, 0b10001000); - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveEven64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufpd(lhs, rhs, 0b00); - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklbw(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0D'09'05'01'0C'08'04'00, 0x8080808080808080)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.pand(lhs, tmp); @@ -1209,20 +1209,20 @@ void EmitX64::EmitVectorDeinterleaveEvenLower8(EmitContext& ctx, IR::Inst* inst) code.movq(lhs, lhs); } - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklwd(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0B0A'0302'0908'0100, 0x8080'8080'8080'8080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.pslld(lhs, 16); code.psrad(lhs, 16); @@ -1235,13 +1235,13 @@ void EmitX64::EmitVectorDeinterleaveEvenLower16(EmitContext& ctx, IR::Inst* inst code.movq(lhs, lhs); } - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); if (code.HasHostFeature(HostFeature::SSE41)) { // copy bytes 0:3 of rhs to lhs, zero out upper 8 bytes @@ -1251,64 +1251,64 @@ void EmitX64::EmitVectorDeinterleaveEvenLower32(EmitContext& ctx, IR::Inst* inst code.movq(lhs, lhs); } - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveOdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psraw(lhs, 8); code.psraw(rhs, 8); code.packsswb(lhs, rhs); - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveOdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrad(lhs, 16); code.psrad(rhs, 16); code.packssdw(lhs, rhs); - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveOdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufps(lhs, rhs, 0b11011101); - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveOdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.shufpd(lhs, rhs, 0b11); - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklbw(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0F'0B'07'03'0E'0A'06'02, 0x8080808080808080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psraw(lhs, 8); code.psraw(rhs, 8); @@ -1317,20 +1317,20 @@ void EmitX64::EmitVectorDeinterleaveOddLower8(EmitContext& ctx, IR::Inst* inst) code.movq(lhs, lhs); } - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklwd(lhs, rhs); code.pshufb(lhs, code.Const(xword, 0x0F0E'0706'0D0C'0504, 0x8080'8080'8080'8080)); } else { - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrad(lhs, 16); code.psrad(rhs, 16); @@ -1339,30 +1339,30 @@ void EmitX64::EmitVectorDeinterleaveOddLower16(EmitContext& ctx, IR::Inst* inst) code.movq(lhs, lhs); } - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } void EmitX64::EmitVectorDeinterleaveOddLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm lhs = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseScratchXmm(code, args[1]); // copy bytes 4:7 of lhs to bytes 0:3 of rhs, zero out upper 8 bytes code.insertps(rhs, lhs, 0b01001100); - ctx.reg_alloc.DefineValue(inst, rhs); + ctx.reg_alloc.DefineValue(code, inst, rhs); } else { - const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm lhs = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm rhs = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); code.xorps(zero, zero); code.unpcklps(lhs, rhs); code.unpckhpd(lhs, zero); - ctx.reg_alloc.DefineValue(inst, lhs); + ctx.reg_alloc.DefineValue(code, inst, lhs); } } @@ -1390,34 +1390,34 @@ void EmitX64::EmitVectorEqual64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqd(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b10110001); code.pand(xmm_a, tmp); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqq(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b01001110); code.pand(xmm_a, tmp); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } else { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqd(xmm_a, xmm_b); code.pshufd(tmp, xmm_a, 0b10110001); @@ -1425,7 +1425,7 @@ void EmitX64::EmitVectorEqual128(EmitContext& ctx, IR::Inst* inst) { code.pshufd(tmp, xmm_a, 0b01001110); code.pand(xmm_a, tmp); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } } @@ -1436,46 +1436,46 @@ void EmitX64::EmitVectorExtract(EmitContext& ctx, IR::Inst* inst) { ASSERT(position % 8 == 0); if (position == 0) { - ctx.reg_alloc.DefineValue(inst, args[0]); + ctx.reg_alloc.DefineValue(code, inst, args[0]); return; } if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.palignr(xmm_b, xmm_a, position / 8); - ctx.reg_alloc.DefineValue(inst, xmm_b); + ctx.reg_alloc.DefineValue(code, inst, xmm_b); return; } - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.psrldq(xmm_a, position / 8); code.pslldq(xmm_b, (128 - position) / 8); code.por(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitVectorExtractLower(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 position = args[2].GetImmediateU8(); ASSERT(position % 8 == 0); if (position != 0) { - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.punpcklqdq(xmm_a, xmm_b); code.psrldq(xmm_a, position / 8); } code.movq(xmm_a, xmm_a); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitVectorGreaterS8(EmitContext& ctx, IR::Inst* inst) { @@ -1506,9 +1506,9 @@ void EmitX64::EmitVectorGreaterS64(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, b); code.pand(tmp, a); @@ -1529,7 +1529,7 @@ static void EmitVectorHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* break; } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { @@ -1547,9 +1547,9 @@ void EmitX64::EmitVectorHalvingAddS32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingAddUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, b); @@ -1574,7 +1574,7 @@ static void EmitVectorHalvingAddUnsigned(size_t esize, EmitContext& ctx, IR::Ins break; } - ctx.reg_alloc.DefineValue(inst, tmp); + ctx.reg_alloc.DefineValue(code, inst, tmp); } void EmitX64::EmitVectorHalvingAddU8(EmitContext& ctx, IR::Inst* inst) { @@ -1592,12 +1592,12 @@ void EmitX64::EmitVectorHalvingAddU32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x8080808080808080, 0x8080808080808080)); code.pxor(a, tmp); code.pxor(b, tmp); @@ -1606,7 +1606,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* break; } case 16: { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x8000800080008000, 0x8000800080008000)); code.pxor(a, tmp); code.pxor(b, tmp); @@ -1622,7 +1622,7 @@ static void EmitVectorHalvingSubSigned(size_t esize, EmitContext& ctx, IR::Inst* break; } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorHalvingSubS8(EmitContext& ctx, IR::Inst* inst) { @@ -1640,8 +1640,8 @@ void EmitX64::EmitVectorHalvingSubS32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorHalvingSubUnsigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: @@ -1660,7 +1660,7 @@ static void EmitVectorHalvingSubUnsigned(size_t esize, EmitContext& ctx, IR::Ins break; } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorHalvingSubU8(EmitContext& ctx, IR::Inst* inst) { @@ -1678,8 +1678,8 @@ void EmitX64::EmitVectorHalvingSubU32(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); switch (size) { case 8: @@ -1696,7 +1696,7 @@ static void EmitVectorInterleaveLower(BlockOfCode& code, EmitContext& ctx, IR::I break; } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorInterleaveLower8(EmitContext& ctx, IR::Inst* inst) { @@ -1718,8 +1718,8 @@ void EmitX64::EmitVectorInterleaveLower64(EmitContext& ctx, IR::Inst* inst) { static void EmitVectorInterleaveUpper(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, int size) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); switch (size) { case 8: @@ -1736,7 +1736,7 @@ static void EmitVectorInterleaveUpper(BlockOfCode& code, EmitContext& ctx, IR::I break; } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorInterleaveUpper8(EmitContext& ctx, IR::Inst* inst) { @@ -1758,7 +1758,7 @@ void EmitX64::EmitVectorInterleaveUpper64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); if (shift_amount == 0) { @@ -1778,46 +1778,46 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { code.pand(result, code.Const(xword, mask, mask)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorLogicalShiftLeft16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psllw(result, shift_amount); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorLogicalShiftLeft32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.pslld(result, shift_amount); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorLogicalShiftLeft64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psllq(result, shift_amount); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); if (shift_amount == 0) { @@ -1835,40 +1835,40 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { code.pand(result, code.Const(xword, mask, mask)); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorLogicalShiftRight16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrlw(result, shift_amount); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorLogicalShiftRight32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrld(result, shift_amount); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorLogicalShiftRight64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); const u8 shift_amount = args[1].GetImmediateU8(); code.psrlq(result, shift_amount); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -1876,9 +1876,9 @@ static void EmitVectorLogicalVShiftAVX2(BlockOfCode& code, EmitContext& ctx, IR: static_assert(esize == 32 || esize == 64); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); // store sign bit of lowest byte of each element of b to select left/right shift later ICODE(vpsll)(xmm0, b, u8(esize - 8)); @@ -1898,16 +1898,16 @@ static void EmitVectorLogicalVShiftAVX2(BlockOfCode& code, EmitContext& ctx, IR: code.blendvpd(result, a); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } void EmitX64::EmitVectorLogicalVShift8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::GFNI)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Opmask negative_mask = k1; code.pxor(tmp, tmp); @@ -1940,23 +1940,22 @@ void EmitX64::EmitVectorLogicalVShift8(EmitContext& ctx, IR::Inst* inst) { // Un-reverse bits of negative-shifts code.vgf2p8affineqb(result | negative_mask, result, xmm0, 0); - ctx.reg_alloc.DefineValue(inst, result); - return; - } - + ctx.reg_alloc.DefineValue(code, inst, result); + } else { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); }); + } } void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm left_shift = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa32(tmp, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.vpxord(right_shift, right_shift, right_shift); @@ -1968,13 +1967,12 @@ void EmitX64::EmitVectorLogicalVShift16(EmitContext& ctx, IR::Inst* inst) { code.vpsrlvw(result, result, right_shift); code.vpord(result, result, tmp); - ctx.reg_alloc.DefineValue(inst, result); - return; - } - + ctx.reg_alloc.DefineValue(code, inst, result); + } else { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), VShift); }); + } } void EmitX64::EmitVectorLogicalVShift32(EmitContext& ctx, IR::Inst* inst) { @@ -2008,11 +2006,9 @@ enum class MinMaxOperation { // Compute the minimum/maximum of two vectors of signed 8-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -template -void FallbackMinMaxS8(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); - - if constexpr (op == MinMaxOperation::Min) { +void FallbackMinMaxS8(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + if(op == MinMaxOperation::Min) { code.movdqa(c, b); code.pcmpgtb(c, a); } else { @@ -2027,10 +2023,9 @@ void FallbackMinMaxS8(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, // Compute the minimum/maximum of two vectors of unsigned 16-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -template -void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b) { - if constexpr (op == MinMaxOperation::Min) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); +void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { + if(op == MinMaxOperation::Min) { + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psubusw(c, b); code.psubw(a, c); @@ -2042,11 +2037,9 @@ void FallbackMinMaxU16(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, // Compute the minimum/maximum of two vectors of signed 32-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -template -void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); - - if constexpr (op == MinMaxOperation::Min) { +void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + if(op == MinMaxOperation::Min) { code.movdqa(c, b); code.pcmpgtd(c, a); } else { @@ -2061,14 +2054,13 @@ void FallbackMinMaxS32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, // Compute the minimum/maximum of two vectors of unsigned 32-bit integers, using only SSE2 instructons. // The result of the operation is placed in operand a, while b is unmodified. -template -void FallbackMinMaxU32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); +void FallbackMinMaxU32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, const Xbyak::Xmm& b, MinMaxOperation op) { + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, code.BConst<32>(xword, 0x80000000)); // bias a and b by XORing their sign bits, then use the signed comparison function - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(); - if constexpr (op == MinMaxOperation::Min) { + const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); + if(op == MinMaxOperation::Min) { code.movdqa(d, a); code.pxor(d, c); code.pxor(c, b); @@ -2088,16 +2080,13 @@ void FallbackMinMaxU32(BlockOfCode& code, EmitContext& ctx, const Xbyak::Xmm& a, void EmitX64::EmitVectorMaxS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsb); - return; - } - + } else { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - - FallbackMinMaxS8(code, ctx, a, b); - - ctx.reg_alloc.DefineValue(inst, a); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + FallbackMinMaxS8(code, ctx, a, b, MinMaxOperation::Max); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorMaxS16(EmitContext& ctx, IR::Inst* inst) { @@ -2107,40 +2096,33 @@ void EmitX64::EmitVectorMaxS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorMaxS32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxsd); - return; - } - + } else { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - - FallbackMinMaxS32(code, ctx, a, b); - - ctx.reg_alloc.DefineValue(inst, a); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + FallbackMinMaxS32(code, ctx, a, b, MinMaxOperation::Max); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorMaxS64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmaxsq); - return; - } - - if (code.HasHostFeature(HostFeature::AVX)) { + } else if (code.HasHostFeature(HostFeature::AVX)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); code.vpcmpgtq(xmm0, y, x); code.pblendvb(x, y); - ctx.reg_alloc.DefineValue(inst, x); - return; - } - + ctx.reg_alloc.DefineValue(code, inst, x); + } else { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::max)(x, y); }); }); + } } void EmitX64::EmitVectorMaxU8(EmitContext& ctx, IR::Inst* inst) { @@ -2150,45 +2132,36 @@ void EmitX64::EmitVectorMaxU8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorMaxU16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw); - return; - } - + } else { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - - FallbackMinMaxU16(code, ctx, a, b); - - ctx.reg_alloc.DefineValue(inst, a); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + FallbackMinMaxU16(code, ctx, a, b, MinMaxOperation::Max); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorMaxU32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmaxud); - return; - } - + } else { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - - FallbackMinMaxU32(code, ctx, a, b); - - ctx.reg_alloc.DefineValue(inst, a); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + FallbackMinMaxU32(code, ctx, a, b, MinMaxOperation::Max); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmaxuq); - return; - } - - if (code.HasHostFeature(HostFeature::AVX)) { + } else if (code.HasHostFeature(HostFeature::AVX)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.Const(xword, 0x8000000000000000, 0x8000000000000000)); code.vpsubq(tmp, y, xmm0); @@ -2196,28 +2169,24 @@ void EmitX64::EmitVectorMaxU64(EmitContext& ctx, IR::Inst* inst) { code.vpcmpgtq(xmm0, tmp, xmm0); code.pblendvb(x, y); - ctx.reg_alloc.DefineValue(inst, x); - return; - } - + ctx.reg_alloc.DefineValue(code, inst, x); + } else { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::max)(x, y); }); }); + } } void EmitX64::EmitVectorMinS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); - return; - } - + } else { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - - FallbackMinMaxS8(code, ctx, a, b); - - ctx.reg_alloc.DefineValue(inst, a); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + FallbackMinMaxS8(code, ctx, a, b, MinMaxOperation::Min); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorMinS16(EmitContext& ctx, IR::Inst* inst) { @@ -2227,40 +2196,33 @@ void EmitX64::EmitVectorMinS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorMinS32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminsd); - return; - } - + } else { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - - FallbackMinMaxS32(code, ctx, a, b); - - ctx.reg_alloc.DefineValue(inst, a); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + FallbackMinMaxS32(code, ctx, a, b, MinMaxOperation::Min); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorMinS64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpminsq); - return; - } - - if (code.HasHostFeature(HostFeature::AVX)) { + } else if (code.HasHostFeature(HostFeature::AVX)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.vpcmpgtq(xmm0, y, x); code.pblendvb(y, x); - ctx.reg_alloc.DefineValue(inst, y); - return; - } - + ctx.reg_alloc.DefineValue(code, inst, y); + } else { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::min)(x, y); }); }); + } } void EmitX64::EmitVectorMinU8(EmitContext& ctx, IR::Inst* inst) { @@ -2270,45 +2232,36 @@ void EmitX64::EmitVectorMinU8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorMinU16(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminuw); - return; - } - + } else { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - - FallbackMinMaxU16(code, ctx, a, b); - - ctx.reg_alloc.DefineValue(inst, a); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + FallbackMinMaxU16(code, ctx, a, b, MinMaxOperation::Min); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorMinU32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pminud); - return; - } - + } else { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - - FallbackMinMaxU32(code, ctx, a, b); - - ctx.reg_alloc.DefineValue(inst, a); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + FallbackMinMaxU32(code, ctx, a, b, MinMaxOperation::Min); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpminuq); - return; - } - - if (code.HasHostFeature(HostFeature::AVX)) { + } else if (code.HasHostFeature(HostFeature::AVX)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.Const(xword, 0x8000000000000000, 0x8000000000000000)); code.vpsubq(tmp, y, xmm0); @@ -2316,21 +2269,20 @@ void EmitX64::EmitVectorMinU64(EmitContext& ctx, IR::Inst* inst) { code.vpcmpgtq(xmm0, tmp, xmm0); code.pblendvb(y, x); - ctx.reg_alloc.DefineValue(inst, y); - return; - } - + ctx.reg_alloc.DefineValue(code, inst, y); + } else { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { std::transform(a.begin(), a.end(), b.begin(), result.begin(), [](auto x, auto y) { return (std::min)(x, y); }); }); + } } void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(code); // TODO: Optimize code.movdqa(tmp_a, a); @@ -2343,7 +2295,7 @@ void EmitX64::EmitVectorMultiply8(EmitContext& ctx, IR::Inst* inst) { code.psllw(tmp_a, 8); code.por(a, tmp_a); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorMultiply16(EmitContext& ctx, IR::Inst* inst) { @@ -2353,13 +2305,11 @@ void EmitX64::EmitVectorMultiply16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorMultiply32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::pmulld); - return; - } - + } else { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, a); code.psrlq(a, 32); @@ -2370,22 +2320,19 @@ void EmitX64::EmitVectorMultiply32(EmitContext& ctx, IR::Inst* inst) { code.pshufd(b, a, 0b00001000); code.punpckldq(tmp, b); - ctx.reg_alloc.DefineValue(inst, tmp); + ctx.reg_alloc.DefineValue(code, inst, tmp); + } } void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) { EmitAVXVectorOperation(code, ctx, inst, &Xbyak::CodeGenerator::vpmullq); - return; - } - + } else if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Reg64 tmp1 = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Reg64 tmp1 = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code); code.movq(tmp1, a); code.movq(tmp2, b); @@ -2396,15 +2343,14 @@ void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { code.imul(tmp1, tmp2); code.pinsrq(a, tmp1, 1); - ctx.reg_alloc.DefineValue(inst, a); - return; - } - - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm(); + ctx.reg_alloc.DefineValue(code, inst, a); + } else { + auto args = ctx.reg_alloc.GetArgumentInfo(inst); + const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp3 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp1, a); code.movdqa(tmp2, a); @@ -2421,7 +2367,8 @@ void EmitX64::EmitVectorMultiply64(EmitContext& ctx, IR::Inst* inst) { code.psllq(b, 32); code.paddq(tmp2, b); - ctx.reg_alloc.DefineValue(inst, tmp2); + ctx.reg_alloc.DefineValue(code, inst, tmp2); + } } void EmitX64::EmitVectorMultiplySignedWiden8(EmitContext&, IR::Inst*) { @@ -2452,41 +2399,34 @@ void EmitX64::EmitVectorNarrow16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vpmovwb(result, a); - ctx.reg_alloc.DefineValue(inst, result); - return; - } - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + ctx.reg_alloc.DefineValue(code, inst, result); + } else { + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.pand(a, code.Const(xword, 0x00FF00FF00FF00FF, 0x00FF00FF00FF00FF)); code.packuswb(a, zeros); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - + const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vpmovdw(result, a); - - ctx.reg_alloc.DefineValue(inst, result); - return; - } - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); - + ctx.reg_alloc.DefineValue(code, inst, result); + } else { + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); if (code.HasHostFeature(HostFeature::SSE41)) { code.pblendw(a, zeros, 0b10101010); @@ -2496,48 +2436,46 @@ void EmitX64::EmitVectorNarrow32(EmitContext& ctx, IR::Inst* inst) { code.psrad(a, 16); code.packssdw(a, zeros); } - - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorNarrow64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vpmovqd(result, a); - ctx.reg_alloc.DefineValue(inst, result); - return; - } - - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + ctx.reg_alloc.DefineValue(code, inst, result); + } else { + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.shufps(a, zeros, 0b00001000); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); + } } void EmitX64::EmitVectorNot(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); code.vpternlogq(result, operand, operand, u8(~Tern::c)); - ctx.reg_alloc.DefineValue(inst, result); - return; - } - - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm(); + ctx.reg_alloc.DefineValue(code, inst, result); + } else { + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqw(xmm_b, xmm_b); code.pxor(xmm_a, xmm_b); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); + } } void EmitX64::EmitVectorOr(EmitContext& ctx, IR::Inst* inst) { @@ -2547,9 +2485,9 @@ void EmitX64::EmitVectorOr(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedAddLower8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); code.movdqa(tmp, xmm_a); @@ -2559,15 +2497,15 @@ void EmitX64::EmitVectorPairedAddLower8(EmitContext& ctx, IR::Inst* inst) { code.psrlw(xmm_a, 8); code.packuswb(xmm_a, tmp); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); if (code.HasHostFeature(HostFeature::SSSE3)) { @@ -2582,15 +2520,15 @@ void EmitX64::EmitVectorPairedAddLower16(EmitContext& ctx, IR::Inst* inst) { code.packssdw(xmm_a, tmp); // Note: packusdw is SSE4.1, hence the arithmetic shift above. } - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.punpcklqdq(xmm_a, xmm_b); if (code.HasHostFeature(HostFeature::SSSE3)) { @@ -2604,16 +2542,16 @@ void EmitX64::EmitVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { code.pshufd(xmm_a, xmm_a, 0b11011000); } - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); } void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2625,24 +2563,24 @@ void EmitX64::EmitVectorPairedAdd8(EmitContext& ctx, IR::Inst* inst) { code.psrlw(b, 8); code.packuswb(a, b); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); code.phaddw(a, b); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2654,7 +2592,7 @@ void EmitX64::EmitVectorPairedAdd16(EmitContext& ctx, IR::Inst* inst) { code.psrad(b, 16); code.packssdw(a, b); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } @@ -2662,17 +2600,17 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSSE3)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); code.phaddd(a, b); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm d = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.movdqa(d, b); @@ -2682,30 +2620,30 @@ void EmitX64::EmitVectorPairedAdd32(EmitContext& ctx, IR::Inst* inst) { code.paddq(b, d); code.shufps(a, b, 0b11011101); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } void EmitX64::EmitVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.punpcklqdq(a, b); code.punpckhqdq(c, b); code.paddq(a, c); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorPairedAddSignedWiden8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllw(a, 8); @@ -2713,14 +2651,14 @@ void EmitX64::EmitVectorPairedAddSignedWiden8(EmitContext& ctx, IR::Inst* inst) code.psraw(a, 8); code.paddw(a, c); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorPairedAddSignedWiden16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.pslld(a, 16); @@ -2728,24 +2666,24 @@ void EmitX64::EmitVectorPairedAddSignedWiden16(EmitContext& ctx, IR::Inst* inst) code.psrad(a, 16); code.paddd(a, c); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX512_Ortho)) { - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); code.vpsraq(c, a, 32); code.vpsllq(a, a, 32); code.vpsraq(a, a, 32); code.vpaddq(a, a, c); } else { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllq(a, 32); @@ -2761,14 +2699,14 @@ void EmitX64::EmitVectorPairedAddSignedWiden32(EmitContext& ctx, IR::Inst* inst) code.por(c, tmp2); code.paddq(a, c); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorPairedAddUnsignedWiden8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllw(a, 8); @@ -2776,14 +2714,14 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden8(EmitContext& ctx, IR::Inst* inst code.psrlw(a, 8); code.paddw(a, c); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorPairedAddUnsignedWiden16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.pslld(a, 16); @@ -2791,14 +2729,14 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden16(EmitContext& ctx, IR::Inst* ins code.psrld(a, 16); code.paddd(a, c); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm c = ctx.reg_alloc.ScratchXmm(code); code.movdqa(c, a); code.psllq(a, 32); @@ -2806,7 +2744,7 @@ void EmitX64::EmitVectorPairedAddUnsignedWiden32(EmitContext& ctx, IR::Inst* ins code.psrlq(a, 32); code.paddq(a, c); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } template @@ -2859,9 +2797,9 @@ template static void EmitVectorPairedMinMax8(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); code.pshufb(x, tmp); @@ -2878,15 +2816,15 @@ static void EmitVectorPairedMinMax8(BlockOfCode& code, EmitContext& ctx, IR::Ins fn(x, tmp); } - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } template static void EmitVectorPairedMinMaxLower8(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.punpcklqdq(x, y); code.pshufb(x, code.Const(xword, 0x0E'0C'0A'08'06'04'02'00, 0x0F'0D'0B'09'07'05'03'01)); @@ -2899,16 +2837,16 @@ static void EmitVectorPairedMinMaxLower8(BlockOfCode& code, EmitContext& ctx, IR fn(x, y); } - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } template static void EmitVectorPairedMinMax16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); // swap idxs 1 and 2 within 64-bit lanes so that both registers contain [even, odd, even, odd]-indexed pairs of elements code.pshuflw(x, x, 0b11'01'10'00); @@ -2931,16 +2869,16 @@ static void EmitVectorPairedMinMax16(BlockOfCode& code, EmitContext& ctx, IR::In fn(x, tmp); } - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } template static void EmitVectorPairedMinMaxLower16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Function fn) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); // swap idxs 1 and 2 so that both registers contain even then odd-indexed pairs of elements code.pshuflw(x, x, 0b11'01'10'00); @@ -2956,15 +2894,15 @@ static void EmitVectorPairedMinMaxLower16(BlockOfCode& code, EmitContext& ctx, I (code.*fn)(x, tmp); - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } static void EmitVectorPairedMinMaxLower32(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Xmm&, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); // tmp = x[1], y[1], 0, 0 code.movaps(tmp, y); @@ -2974,7 +2912,7 @@ static void EmitVectorPairedMinMaxLower32(BlockOfCode& code, EmitContext& ctx, I (code.*fn)(x, tmp); - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMaxS8(EmitContext& ctx, IR::Inst* inst) { @@ -2983,7 +2921,7 @@ void EmitX64::EmitVectorPairedMaxS8(EmitContext& ctx, IR::Inst* inst) { return; } else if (code.HasHostFeature(HostFeature::SSSE3)) { EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs); + FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Max); }); return; } @@ -3000,9 +2938,9 @@ void EmitX64::EmitVectorPairedMaxS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxS32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -3011,10 +2949,10 @@ void EmitX64::EmitVectorPairedMaxS32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { code.pmaxsd(x, tmp); } else { - FallbackMinMaxS32(code, ctx, x, tmp); + FallbackMinMaxS32(code, ctx, x, tmp, MinMaxOperation::Max); } - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMaxU8(EmitContext& ctx, IR::Inst* inst) { @@ -3033,7 +2971,7 @@ void EmitX64::EmitVectorPairedMaxU16(EmitContext& ctx, IR::Inst* inst) { EmitVectorPairedMinMax16(code, ctx, inst, &Xbyak::CodeGenerator::pmaxuw); } else { EmitVectorPairedMinMax16(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxU16(code, ctx, lhs, rhs); + FallbackMinMaxU16(code, ctx, lhs, rhs, MinMaxOperation::Max); }); } } @@ -3041,9 +2979,9 @@ void EmitX64::EmitVectorPairedMaxU16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -3052,26 +2990,24 @@ void EmitX64::EmitVectorPairedMaxU32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { code.pmaxud(x, tmp); } else { - FallbackMinMaxU32(code, ctx, x, tmp); + FallbackMinMaxU32(code, ctx, x, tmp, MinMaxOperation::Max); } - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMinS8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { EmitVectorPairedMinMax8(code, ctx, inst, &Xbyak::CodeGenerator::pminsb); - return; } else if (code.HasHostFeature(HostFeature::SSSE3)) { EmitVectorPairedMinMax8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs); + FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Min); }); - return; - } - + } else { EmitTwoArgumentFallback(code, ctx, inst, [](VectorArray& result, const VectorArray& a, const VectorArray& b) { PairedMin(result, a, b); }); + } } void EmitX64::EmitVectorPairedMinS16(EmitContext& ctx, IR::Inst* inst) { @@ -3081,21 +3017,19 @@ void EmitX64::EmitVectorPairedMinS16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinS32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); code.shufps(x, y, 0b11011101); - if (code.HasHostFeature(HostFeature::SSE41)) { code.pminsd(x, tmp); } else { - FallbackMinMaxS32(code, ctx, x, tmp); + FallbackMinMaxS32(code, ctx, x, tmp, MinMaxOperation::Min); } - - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMinU8(EmitContext& ctx, IR::Inst* inst) { @@ -3114,7 +3048,7 @@ void EmitX64::EmitVectorPairedMinU16(EmitContext& ctx, IR::Inst* inst) { EmitVectorPairedMinMax16(code, ctx, inst, &Xbyak::CodeGenerator::pminuw); } else { EmitVectorPairedMinMax16(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxU16(code, ctx, lhs, rhs); + FallbackMinMaxU16(code, ctx, lhs, rhs, MinMaxOperation::Min); }); } } @@ -3122,9 +3056,9 @@ void EmitX64::EmitVectorPairedMinU16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, x); code.shufps(tmp, y, 0b10001000); @@ -3133,10 +3067,10 @@ void EmitX64::EmitVectorPairedMinU32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { code.pminud(x, tmp); } else { - FallbackMinMaxU32(code, ctx, x, tmp); + FallbackMinMaxU32(code, ctx, x, tmp, MinMaxOperation::Min); } - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorPairedMaxLowerS8(EmitContext& ctx, IR::Inst* inst) { @@ -3145,7 +3079,7 @@ void EmitX64::EmitVectorPairedMaxLowerS8(EmitContext& ctx, IR::Inst* inst) { return; } else if (code.HasHostFeature(HostFeature::SSSE3)) { EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs); + FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Max); }); return; } @@ -3216,7 +3150,7 @@ void EmitX64::EmitVectorPairedMinLowerS8(EmitContext& ctx, IR::Inst* inst) { return; } else if (code.HasHostFeature(HostFeature::SSSE3)) { EmitVectorPairedMinMaxLower8(code, ctx, inst, [&](const auto& lhs, const auto& rhs) { - FallbackMinMaxS8(code, ctx, lhs, rhs); + FallbackMinMaxS8(code, ctx, lhs, rhs, MinMaxOperation::Min); }); return; } @@ -3299,12 +3233,12 @@ static D PolynomialMultiply(T lhs, T rhs) { void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm alternate = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg32 counter = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm alternate = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Reg32 counter = ctx.reg_alloc.ScratchGpr(code).cvt32(); Xbyak::Label loop; @@ -3329,7 +3263,7 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { code.sub(counter, 1); code.jnz(loop); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -3341,12 +3275,12 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm alternate = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg32 counter = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm alternate = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Reg32 counter = ctx.reg_alloc.ScratchGpr(code).cvt32(); Xbyak::Label loop; @@ -3373,7 +3307,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst code.sub(counter, 1); code.jnz(loop); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -3387,12 +3321,12 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst void EmitX64::EmitVectorPolynomialMultiplyLong64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::PCLMULQDQ)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); code.pclmulqdq(xmm_a, xmm_b, 0x00); - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); return; } @@ -3418,21 +3352,21 @@ void EmitX64::EmitVectorPolynomialMultiplyLong64(EmitContext& ctx, IR::Inst* ins void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512VL | HostFeature::AVX512BITALG)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpopcntb(data, data); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); return; } if (code.HasHostFeature(HostFeature::SSSE3)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm low_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm high_a = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm low_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm high_a = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_a, low_a); code.psrlw(high_a, 4); @@ -3447,7 +3381,7 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { code.paddb(tmp1, tmp2); - ctx.reg_alloc.DefineValue(inst, tmp1); + ctx.reg_alloc.DefineValue(code, inst, tmp1); return; } @@ -3461,12 +3395,12 @@ void EmitX64::EmitVectorPopulationCount(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::GFNI)) { code.gf2p8affineqb(data, code.Const(xword, 0x8040201008040201, 0x8040201008040201), 0); } else { - const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_nibble_reg, code.Const(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.pand(high_nibble_reg, data); code.pxor(data, high_nibble_reg); @@ -3474,7 +3408,7 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSSE3)) { // High lookup - const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm high_reversed_reg = ctx.reg_alloc.ScratchXmm(code); code.movdqa(high_reversed_reg, code.Const(xword, 0xE060A020C0408000, 0xF070B030D0509010)); code.pshufb(high_reversed_reg, data); @@ -3502,32 +3436,32 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { } } - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); code.psrlw(data, 8); code.por(data, tmp); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpshufb(data, data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b)); } else if (code.HasHostFeature(HostFeature::SSSE3)) { code.pshufb(data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); code.psrlw(data, 8); @@ -3535,26 +3469,26 @@ void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* code.pshuflw(data, data, 0b10110001); code.pshufhw(data, data, 0b10110001); } - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b10110001); code.pshufhw(data, data, 0b10110001); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpshufb(data, data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); } else if (code.HasHostFeature(HostFeature::SSSE3)) { code.pshufb(data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp, data); code.psllw(tmp, 8); code.psrlw(data, 8); @@ -3562,35 +3496,35 @@ void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* code.pshuflw(data, data, 0b00011011); code.pshufhw(data, data, 0b00011011); } - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorReverseElementsInLongGroups16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b00011011); code.pshufhw(data, data, 0b00011011); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorReverseElementsInLongGroups32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pshuflw(data, data, 0b01001110); code.pshufhw(data, data, 0b01001110); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Xmm temp = xmm0; // Add upper elements to lower elements @@ -3605,13 +3539,13 @@ void EmitX64::EmitVectorReduceAdd8(EmitContext& ctx, IR::Inst* inst) { code.pslldq(data, 15); code.psrldq(data, 15); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Xmm temp = xmm0; if (code.HasHostFeature(HostFeature::SSSE3)) { @@ -3636,13 +3570,13 @@ void EmitX64::EmitVectorReduceAdd16(EmitContext& ctx, IR::Inst* inst) { code.psrldq(data, 14); } - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorReduceAdd32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Xmm temp = xmm0; // Add upper elements to lower elements(reversed) @@ -3660,13 +3594,13 @@ void EmitX64::EmitVectorReduceAdd32(EmitContext& ctx, IR::Inst* inst) { // shift upper-most result into lower-most lane code.psrldq(data, 12); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Xmm temp = xmm0; // Add upper elements to lower elements @@ -3676,32 +3610,32 @@ void EmitX64::EmitVectorReduceAdd64(EmitContext& ctx, IR::Inst* inst) { // Zero-extend lower 64-bits code.movq(data, data); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorRotateWholeVectorRight(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); const u8 shift_amount = args[1].GetImmediateU8(); ASSERT(shift_amount % 32 == 0); const u8 shuffle_imm = std::rotr(0b11100100, shift_amount / 32 * 2); code.pshufd(result, operand, shuffle_imm); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); switch (esize) { case 8: { - const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm vec_128 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(vec_128, code.Const(xword, 0x8080808080808080, 0x8080808080808080)); code.paddb(a, vec_128); @@ -3711,7 +3645,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I break; } case 16: { - const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm vec_32768 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(vec_32768, code.Const(xword, 0x8000800080008000, 0x8000800080008000)); code.paddw(a, vec_32768); @@ -3721,7 +3655,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I break; } case 32: { - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp1, a); code.por(a, b); @@ -3735,7 +3669,7 @@ static void EmitVectorRoundingHalvingAddSigned(size_t esize, EmitContext& ctx, I } } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorRoundingHalvingAddS8(EmitContext& ctx, IR::Inst* inst) { @@ -3761,9 +3695,9 @@ static void EmitVectorRoundingHalvingAddUnsigned(size_t esize, EmitContext& ctx, case 32: { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); code.movdqa(tmp1, a); @@ -3775,7 +3709,7 @@ static void EmitVectorRoundingHalvingAddUnsigned(size_t esize, EmitContext& ctx, code.psrld(a, 31); code.paddd(a, b); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); break; } } @@ -3831,18 +3765,18 @@ static void EmitUnsignedRoundingShiftLeft(BlockOfCode& code, EmitContext& ctx, I static_assert(esize == 32 || esize == 64); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseXmm(code, args[1]); // positive values of b are left shifts, while negative values are (positive) rounding right shifts // only the lowest byte of each element is read as the shift amount // conveniently, the behavior of bit shifts greater than element width is the same in NEON and SSE/AVX - filled with zeros - const Xbyak::Xmm shift_amount = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm shift_amount = ctx.reg_alloc.ScratchXmm(code); code.vpabsb(shift_amount, b); code.vpand(shift_amount, shift_amount, code.BConst(xword, 0xFF)); // if b is positive, do a normal left shift - const Xbyak::Xmm left_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm left_shift = ctx.reg_alloc.ScratchXmm(code); ICODE(vpsllv)(left_shift, a, shift_amount); // if b is negative, compute the rounding right shift @@ -3853,7 +3787,7 @@ static void EmitUnsignedRoundingShiftLeft(BlockOfCode& code, EmitContext& ctx, I // tmp = (a >> (b - 1)) & 1 // res = (a >> b) + tmp // to add the value of the last bit to be shifted off to the result of the right shift - const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm right_shift = ctx.reg_alloc.ScratchXmm(code); code.vmovdqa(xmm0, code.BConst(xword, 1)); // find value of last bit to be shifted off @@ -3875,7 +3809,7 @@ static void EmitUnsignedRoundingShiftLeft(BlockOfCode& code, EmitContext& ctx, I code.blendvpd(left_shift, right_shift); } - ctx.reg_alloc.DefineValue(inst, left_shift); + ctx.reg_alloc.DefineValue(code, inst, left_shift); return; } @@ -3940,57 +3874,57 @@ void EmitX64::EmitVectorRoundingShiftLeftU64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitVectorSignExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pmovsxbw(a, a); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.pxor(result, result); code.punpcklbw(result, a); code.psraw(result, 8); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } void EmitX64::EmitVectorSignExtend16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.pmovsxwd(a, a); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } else { - const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.pxor(result, result); code.punpcklwd(result, a); code.psrad(result, 16); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } void EmitX64::EmitVectorSignExtend32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovsxdq(a, a); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movaps(tmp, a); code.psrad(tmp, 31); code.punpckldq(a, tmp); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Reg64 gpr_tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Reg64 gpr_tmp = ctx.reg_alloc.ScratchGpr(code); code.movq(gpr_tmp, data); code.sar(gpr_tmp, 63); @@ -3998,20 +3932,20 @@ void EmitX64::EmitVectorSignExtend64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41)) { code.pinsrq(data, gpr_tmp, 1); } else { - const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code); code.movq(xmm_tmp, gpr_tmp); code.punpcklqdq(data, xmm_tmp); } - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } static void EmitVectorSignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); // only signed 16-bit min/max are available below SSE4.1 if (code.HasHostFeature(HostFeature::SSE41) || esize == 16) { @@ -4057,7 +3991,7 @@ static void EmitVectorSignedAbsoluteDifference(size_t esize, EmitContext& ctx, I } } - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorSignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) { @@ -4077,11 +4011,11 @@ void EmitX64::EmitVectorSignedMultiply16(EmitContext& ctx, IR::Inst* inst) { const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); if (upper_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhw(result, x, y); } else { @@ -4089,18 +4023,18 @@ void EmitX64::EmitVectorSignedMultiply16(EmitContext& ctx, IR::Inst* inst) { code.pmulhw(result, y); } - ctx.reg_alloc.DefineValue(upper_inst, result); + ctx.reg_alloc.DefineValue(code, upper_inst, result); } if (lower_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmullw(result, x, y); } else { code.movdqa(result, x); code.pmullw(result, y); } - ctx.reg_alloc.DefineValue(lower_inst, result); + ctx.reg_alloc.DefineValue(code, lower_inst, result); } } @@ -4111,27 +4045,27 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (lower_inst && !upper_inst && code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(result, x, y); - ctx.reg_alloc.DefineValue(lower_inst, result); + ctx.reg_alloc.DefineValue(code, lower_inst, result); return; } if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (lower_inst) { - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(lower_result, x, y); - ctx.reg_alloc.DefineValue(lower_inst, lower_result); + ctx.reg_alloc.DefineValue(code, lower_inst, lower_result); } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vpmuldq(result, x, y); code.vpsrlq(x, x, 32); @@ -4139,16 +4073,16 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { code.vpmuldq(x, x, y); code.shufps(result, x, 0b11011101); - ctx.reg_alloc.DefineValue(upper_inst, result); + ctx.reg_alloc.DefineValue(code, upper_inst, result); return; } - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); // calculate sign correction code.movdqa(tmp, x); @@ -4181,18 +4115,18 @@ void EmitX64::EmitVectorSignedMultiply32(EmitContext& ctx, IR::Inst* inst) { code.psubd(upper_result, sign_correction); if (upper_inst) { - ctx.reg_alloc.DefineValue(upper_inst, upper_result); + ctx.reg_alloc.DefineValue(code, upper_inst, upper_result); } if (lower_inst) { - ctx.reg_alloc.DefineValue(lower_inst, lower_result); + ctx.reg_alloc.DefineValue(code, lower_inst, lower_result); } } static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); // SSE absolute value functions return an unsigned result // this means abs(SIGNED_MIN) returns its value unchanged, leaving the most significant bit set @@ -4261,7 +4195,7 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo } code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - ctx.reg_alloc.DefineValue(inst, data); + ctx.reg_alloc.DefineValue(code, inst, data); } void EmitX64::EmitVectorSignedSaturatedAbs8(EmitContext& ctx, IR::Inst* inst) { @@ -4284,13 +4218,13 @@ template static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); code.movdqa(xmm0, y); ctx.reg_alloc.Release(y); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); switch (bit_width) { case 8: @@ -4347,7 +4281,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC switch (bit_width) { case 8: if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqb(tmp2, tmp2); code.pxor(tmp, tmp); code.vpblendvb(xmm0, tmp, tmp2, xmm0); @@ -4393,7 +4327,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC break; } - const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pmovmskb(mask, xmm0); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], mask); @@ -4405,7 +4339,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC code.movdqa(result, xmm0); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorSignedSaturatedAccumulateUnsigned8(EmitContext& ctx, IR::Inst* inst) { @@ -4427,10 +4361,10 @@ void EmitX64::EmitVectorSignedSaturatedAccumulateUnsigned64(EmitContext& ctx, IR template static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm upper_tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm lower_tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm upper_tmp = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm lower_tmp = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhw(upper_tmp, x, y); @@ -4449,7 +4383,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC ctx.reg_alloc.Release(x); ctx.reg_alloc.Release(y); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { if constexpr (is_rounding) { @@ -4479,11 +4413,11 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC code.pxor(result, upper_tmp); } - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pmovmskb(bit, upper_tmp); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHigh16(EmitContext& ctx, IR::Inst* inst) { @@ -4499,10 +4433,10 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm odds = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm even = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm odds = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm even = ctx.reg_alloc.ScratchXmm(code); code.vpmuldq(odds, x, y); code.vpsrlq(x, x, 32); @@ -4515,7 +4449,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpaddq(odds, odds, odds); code.vpaddq(even, even, even); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if constexpr (is_rounding) { code.vmovdqa(result, code.Const(xword, 0x0000000080000000, 0x0000000080000000)); @@ -4526,8 +4460,8 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.vpsrlq(result, odds, 32); code.vblendps(result, result, even, 0b1010); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); code.vpxor(result, result, mask); @@ -4537,15 +4471,15 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& ctx.reg_alloc.Release(mask); ctx.reg_alloc.Release(bit); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm sign_correction = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); // calculate sign correction code.movdqa(tmp, x); @@ -4582,7 +4516,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.por(result, tmp); code.psubd(result, sign_correction); - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.movdqa(tmp, code.Const(xword, 0x8000000080000000, 0x8000000080000000)); code.pcmpeqd(tmp, result); @@ -4590,7 +4524,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext& code.pmovmskb(bit, tmp); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHigh32(EmitContext& ctx, IR::Inst* inst) { @@ -4604,8 +4538,8 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyHighRounding32(EmitContex void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.punpcklwd(x, x); code.punpcklwd(y, y); @@ -4620,18 +4554,18 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx, code.pxor(x, y); } - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pmovmskb(bit, y); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmovsxdq(x, x); @@ -4639,10 +4573,10 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, code.vpmuldq(x, x, y); code.vpaddq(x, x, x); } else { - const Xbyak::Reg64 a = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 b = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 c = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 d = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 a = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 b = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 c = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 d = ctx.reg_alloc.ScratchGpr(code); code.movq(c, x); code.movq(d, y); @@ -4663,7 +4597,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, ctx.reg_alloc.Release(d); } - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); if (code.HasHostFeature(HostFeature::AVX)) { code.vpcmpeqq(y, x, code.Const(xword, 0x8000000000000000, 0x8000000000000000)); code.vpxor(x, x, y); @@ -4677,15 +4611,15 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx, } code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - ctx.reg_alloc.DefineValue(inst, x); + ctx.reg_alloc.DefineValue(code, inst, x); } static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm sign = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm sign = ctx.reg_alloc.ScratchXmm(code); code.movdqa(dest, src); code.pxor(xmm0, xmm0); @@ -4710,13 +4644,13 @@ static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, Block UNREACHABLE(); } - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pcmpeqd(reconstructed, src); code.movmskps(bit, reconstructed); code.xor_(bit, 0b1111); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - ctx.reg_alloc.DefineValue(inst, dest); + ctx.reg_alloc.DefineValue(code, inst, dest); } void EmitX64::EmitVectorSignedSaturatedNarrowToSigned16(EmitContext& ctx, IR::Inst* inst) { @@ -4742,9 +4676,9 @@ void EmitX64::EmitVectorSignedSaturatedNarrowToSigned64(EmitContext& ctx, IR::In static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm src = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm dest = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm reconstructed = ctx.reg_alloc.ScratchXmm(code); code.movdqa(dest, src); code.pxor(xmm0, xmm0); @@ -4765,13 +4699,13 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo UNREACHABLE(); } - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pcmpeqd(reconstructed, src); code.movmskps(bit, reconstructed); code.xor_(bit, 0b1111); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - ctx.reg_alloc.DefineValue(inst, dest); + ctx.reg_alloc.DefineValue(code, inst, dest); } void EmitX64::EmitVectorSignedSaturatedNarrowToUnsigned16(EmitContext& ctx, IR::Inst* inst) { @@ -4812,9 +4746,9 @@ void EmitX64::EmitVectorSignedSaturatedNarrowToUnsigned64(EmitContext& ctx, IR:: static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm data = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm data = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Address mask = [esize, &code] { switch (esize) { case 8: @@ -4870,11 +4804,11 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo } // Check if any elements matched the mask prior to performing saturation. If so, set the Q bit. - const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.pmovmskb(bit, tmp); code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit); - ctx.reg_alloc.DefineValue(inst, zero); + ctx.reg_alloc.DefineValue(code, inst, zero); } void EmitX64::EmitVectorSignedSaturatedNeg8(EmitContext& ctx, IR::Inst* inst) { @@ -5052,7 +4986,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { const bool is_defaults_zero = inst->GetArg(0).IsZero(); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI)) { - const Xbyak::Xmm indicies = table_size <= 2 ? ctx.reg_alloc.UseXmm(args[2]) : ctx.reg_alloc.UseScratchXmm(args[2]); + const Xbyak::Xmm indicies = table_size <= 2 ? ctx.reg_alloc.UseXmm(code, args[2]) : ctx.reg_alloc.UseScratchXmm(code, args[2]); const u64 index_count = mcl::bit::replicate_element(static_cast(table_size * 8)); @@ -5060,64 +4994,64 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { switch (table_size) { case 1: { - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(table[0]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm_table0); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm_table0); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } break; } case 2: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]); + const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm0); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm0); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } break; } case 3: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(table[2]); + const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[2]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); if (is_defaults_zero) { code.vpermi2b(indicies | k1 | T_z, xmm0, xmm_table1); - ctx.reg_alloc.DefineValue(inst, indicies); + ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm0, xmm_table1); code.vmovdqu8(result | k1, indicies); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } break; } case 4: { - const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[2]); - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(table[3]); + const Xbyak::Xmm xmm_table0_lower = ctx.reg_alloc.UseXmm(code, table[0]); + const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); + const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); + const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.vpunpcklqdq(xmm0, xmm_table0_lower, xmm_table0_upper); code.vpunpcklqdq(xmm_table1, xmm_table1, xmm_table1_upper); if (is_defaults_zero) { code.vpermi2b(indicies | k1 | T_z, xmm0, xmm_table1); - ctx.reg_alloc.DefineValue(inst, indicies); + ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm0, xmm_table1); code.vmovdqu8(result | k1, indicies); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } break; } @@ -5137,39 +5071,39 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { }; if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(table[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.xorps(result, result); code.movsd(result, xmm_table0); code.paddusb(indicies, code.Const(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); code.pshufb(result, indicies); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); code.paddusb(indicies, code.Const(xword, 0x7070707070707070, 0xFFFFFFFFFFFFFFFF)); code.pshufb(xmm_table0, indicies); - ctx.reg_alloc.DefineValue(inst, xmm_table0); + ctx.reg_alloc.DefineValue(code, inst, xmm_table0); return; } if (code.HasHostFeature(HostFeature::SSE41) && table_size <= 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); + const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); if (table_size == 2) { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]); + const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } @@ -5183,17 +5117,17 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { code.pshufb(xmm_table0, indicies); code.pblendvb(xmm_table0, defaults); - ctx.reg_alloc.DefineValue(inst, xmm_table0); + ctx.reg_alloc.DefineValue(code, inst, xmm_table0); return; } if (code.HasHostFeature(HostFeature::SSE41) && is_defaults_zero) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[2]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]); + const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } @@ -5202,7 +5136,7 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { code.punpcklqdq(xmm_table1, xmm0); } else { ASSERT(table_size == 4); - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(table[3]); + const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.punpcklqdq(xmm_table1, xmm_table1_upper); ctx.reg_alloc.Release(xmm_table1_upper); } @@ -5218,23 +5152,23 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { code.pshufb(xmm_table1, indicies); code.pblendvb(xmm_table0, xmm_table1); - ctx.reg_alloc.DefineValue(inst, xmm_table0); + ctx.reg_alloc.DefineValue(code, inst, xmm_table0); return; } if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[2]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[2]); { - const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(table[1]); + const Xbyak::Xmm xmm_table0_upper = ctx.reg_alloc.UseXmm(code, table[1]); code.punpcklqdq(xmm_table0, xmm_table0_upper); ctx.reg_alloc.Release(xmm_table0_upper); } if (table_size == 4) { - const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(table[3]); + const Xbyak::Xmm xmm_table1_upper = ctx.reg_alloc.UseXmm(code, table[3]); code.punpcklqdq(xmm_table1, xmm_table1_upper); ctx.reg_alloc.Release(xmm_table1_upper); } @@ -5256,22 +5190,22 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { } code.pblendvb(xmm_table0, defaults); - ctx.reg_alloc.DefineValue(inst, xmm_table0); + ctx.reg_alloc.DefineValue(code, inst, xmm_table0); return; } const u32 stack_space = static_cast(6 * 8); - ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]); + const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(code, table[i]); code.movq(qword[rsp + ABI_SHADOW_SPACE + i * 8], table_value); ctx.reg_alloc.Release(table_value); } - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 4 * 8]); @@ -5292,9 +5226,9 @@ void EmitX64::EmitVectorTableLookup64(EmitContext& ctx, IR::Inst* inst) { }); code.movq(result, qword[rsp + ABI_SHADOW_SPACE + 4 * 8]); - ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { @@ -5307,14 +5241,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { const bool is_defaults_zero = !inst->GetArg(0).IsImmediate() && inst->GetArg(0).GetInst()->GetOpcode() == IR::Opcode::ZeroVector; if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 4) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); code.vpcmpub(k2, indicies, code.BConst<8>(xword, 4 * 16), CmpInt::LessThan); // Handle vector-table 0,1 - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(table[1]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpermi2b(indicies | k1, xmm_table0, xmm_table1); @@ -5322,32 +5256,32 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.Release(xmm_table1); // Handle vector-table 2,3 - const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseXmm(table[2]); - const Xbyak::Xmm xmm_table3 = ctx.reg_alloc.UseXmm(table[3]); + const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); + const Xbyak::Xmm xmm_table3 = ctx.reg_alloc.UseXmm(code, table[3]); code.kandnw(k1, k1, k2); code.vpermi2b(indicies | k1, xmm_table2, xmm_table3); if (is_defaults_zero) { code.vmovdqu8(indicies | k2 | T_z, indicies); - ctx.reg_alloc.DefineValue(inst, indicies); + ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm defaults = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vmovdqu8(defaults | k2, indicies); - ctx.reg_alloc.DefineValue(inst, defaults); + ctx.reg_alloc.DefineValue(code, inst, defaults); } return; } if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 3) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); code.vpcmpub(k2, indicies, code.BConst<8>(xword, 3 * 16), CmpInt::LessThan); // Handle vector-table 0,1 - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(table[1]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpermi2b(indicies | k1, xmm_table0, xmm_table1); @@ -5355,74 +5289,74 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.Release(xmm_table1); // Handle vector-table 2 - const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseXmm(table[2]); + const Xbyak::Xmm xmm_table2 = ctx.reg_alloc.UseXmm(code, table[2]); code.kandnw(k1, k1, k2); code.vpermb(indicies | k1, indicies, xmm_table2); if (is_defaults_zero) { code.vmovdqu8(indicies | k2 | T_z, indicies); - ctx.reg_alloc.DefineValue(inst, indicies); + ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm defaults = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm defaults = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vmovdqu8(defaults | k2, indicies); - ctx.reg_alloc.DefineValue(inst, defaults); + ctx.reg_alloc.DefineValue(code, inst, defaults); } return; } if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(table[1]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); + const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseXmm(code, table[1]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 2 * 16), CmpInt::LessThan); if (is_defaults_zero) { code.vpermi2b(indicies | k1 | T_z, xmm_table0, xmm_table1); - ctx.reg_alloc.DefineValue(inst, indicies); + ctx.reg_alloc.DefineValue(code, inst, indicies); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermi2b(indicies, xmm_table0, xmm_table1); code.vmovdqu8(result | k1, indicies); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } return; } if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW | HostFeature::AVX512VBMI) && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(table[0]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseXmm(code, table[0]); code.vpcmpub(k1, indicies, code.BConst<8>(xword, 1 * 16), CmpInt::LessThan); if (is_defaults_zero) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vpermb(result | k1 | T_z, indicies, xmm_table0); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.vpermb(result | k1, indicies, xmm_table0); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } return; } if (code.HasHostFeature(HostFeature::SSSE3) && is_defaults_zero && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); code.paddusb(indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); code.pshufb(xmm_table0, indicies); - ctx.reg_alloc.DefineValue(inst, xmm_table0); + ctx.reg_alloc.DefineValue(code, inst, xmm_table0); return; } if (code.HasHostFeature(HostFeature::SSE41) && table_size == 1) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); + const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpaddusb(xmm0, indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); @@ -5433,14 +5367,14 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.pshufb(xmm_table0, indicies); code.pblendvb(xmm_table0, defaults); - ctx.reg_alloc.DefineValue(inst, xmm_table0); + ctx.reg_alloc.DefineValue(code, inst, xmm_table0); return; } if (code.HasHostFeature(HostFeature::SSE41) && is_defaults_zero && table_size == 2) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(args[2]); - const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(table[0]); - const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(table[1]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseScratchXmm(code, args[2]); + const Xbyak::Xmm xmm_table0 = ctx.reg_alloc.UseScratchXmm(code, table[0]); + const Xbyak::Xmm xmm_table1 = ctx.reg_alloc.UseScratchXmm(code, table[1]); if (code.HasHostFeature(HostFeature::AVX)) { code.vpaddusb(xmm0, indicies, code.Const(xword, 0x7070707070707070, 0x7070707070707070)); @@ -5453,19 +5387,19 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { code.pshufb(xmm_table1, indicies); code.pblendvb(xmm_table0, xmm_table1); - ctx.reg_alloc.DefineValue(inst, xmm_table0); + ctx.reg_alloc.DefineValue(code, inst, xmm_table0); return; } if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512BW)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(code); code.vpandd(masked, indicies, code.Const(xword_b, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]); + const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); const Xbyak::Opmask table_mask = k1; const u64 table_index = mcl::bit::replicate_element(i * 16); @@ -5480,20 +5414,20 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.Release(xmm_table); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm masked = ctx.reg_alloc.ScratchXmm(code); code.movaps(masked, code.Const(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); code.pand(masked, indicies); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(table[i]); + const Xbyak::Xmm xmm_table = ctx.reg_alloc.UseScratchXmm(code, table[i]); const u64 table_index = mcl::bit::replicate_element(i * 16); @@ -5512,22 +5446,22 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { ctx.reg_alloc.Release(xmm_table); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } const u32 stack_space = static_cast((table_size + 2) * 16); - ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE); for (size_t i = 0; i < table_size; ++i) { - const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(table[i]); + const Xbyak::Xmm table_value = ctx.reg_alloc.UseXmm(code, table[i]); code.movaps(xword[rsp + ABI_SHADOW_SPACE + i * 16], table_value); ctx.reg_alloc.Release(table_value); } - const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm defaults = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm indicies = ctx.reg_alloc.UseXmm(code, args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]); code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]); @@ -5548,16 +5482,16 @@ void EmitX64::EmitVectorTableLookup128(EmitContext& ctx, IR::Inst* inst) { }); code.movaps(result, xword[rsp + ABI_SHADOW_SPACE + (table_size + 0) * 16]); - ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE); + ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); if (!part) { @@ -5569,14 +5503,14 @@ void EmitX64::EmitVectorTranspose8(EmitContext& ctx, IR::Inst* inst) { } code.por(lower, upper); - ctx.reg_alloc.DefineValue(inst, lower); + ctx.reg_alloc.DefineValue(code, inst, lower); } void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm upper = ctx.reg_alloc.UseScratchXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); if (!part) { @@ -5588,43 +5522,43 @@ void EmitX64::EmitVectorTranspose16(EmitContext& ctx, IR::Inst* inst) { } code.por(lower, upper); - ctx.reg_alloc.DefineValue(inst, lower); + ctx.reg_alloc.DefineValue(code, inst, lower); } void EmitX64::EmitVectorTranspose32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm upper = ctx.reg_alloc.UseXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); code.shufps(lower, upper, !part ? 0b10001000 : 0b11011101); code.pshufd(lower, lower, 0b11011000); - ctx.reg_alloc.DefineValue(inst, lower); + ctx.reg_alloc.DefineValue(code, inst, lower); } void EmitX64::EmitVectorTranspose64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm upper = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm lower = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm upper = ctx.reg_alloc.UseXmm(code, args[1]); const bool part = args[2].GetImmediateU1(); code.shufpd(lower, upper, !part ? 0b00 : 0b11); - ctx.reg_alloc.DefineValue(inst, lower); + ctx.reg_alloc.DefineValue(code, inst, lower); } static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, IR::Inst* inst, BlockOfCode& code) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm(code); switch (esize) { case 8: { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(temp, x); code.psubusb(temp, y); @@ -5633,8 +5567,8 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, break; } case 16: { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); code.movdqa(temp, x); code.psubusw(temp, y); @@ -5645,20 +5579,20 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, case 32: // See https://stackoverflow.com/questions/3380785/compute-the-absolute-difference-between-unsigned-integers-using-sse/3527267#3527267 if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); code.movdqa(temp, x); code.pminud(x, y); code.pmaxud(temp, y); code.psubd(temp, x); } else { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (ctx.HasOptimization(OptimizationFlag::CodeSpeed)) { // About 45 bytes - const Xbyak::Xmm temp_x = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm temp_y = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm temp_x = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm temp_y = ctx.reg_alloc.ScratchXmm(code); code.pcmpeqd(temp, temp); code.pslld(temp, 31); code.movdqa(temp_x, x); @@ -5686,7 +5620,7 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx, break; } - ctx.reg_alloc.DefineValue(inst, temp); + ctx.reg_alloc.DefineValue(code, inst, temp); } void EmitX64::EmitVectorUnsignedAbsoluteDifference8(EmitContext& ctx, IR::Inst* inst) { @@ -5706,11 +5640,11 @@ void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { const auto lower_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetLowerFromOp); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); if (upper_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmulhuw(result, x, y); } else { @@ -5718,18 +5652,18 @@ void EmitX64::EmitVectorUnsignedMultiply16(EmitContext& ctx, IR::Inst* inst) { code.pmulhuw(result, y); } - ctx.reg_alloc.DefineValue(upper_inst, result); + ctx.reg_alloc.DefineValue(code, upper_inst, result); } if (lower_inst) { - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpmullw(result, x, y); } else { code.movdqa(result, x); code.pmullw(result, y); } - ctx.reg_alloc.DefineValue(lower_inst, result); + ctx.reg_alloc.DefineValue(code, lower_inst, result); } } @@ -5740,24 +5674,24 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (lower_inst && !upper_inst && code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm x = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(result, x, y); - ctx.reg_alloc.DefineValue(lower_inst, result); + ctx.reg_alloc.DefineValue(code, lower_inst, result); } else if (code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); if (lower_inst) { - const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm(code); code.vpmulld(lower_result, x, y); - ctx.reg_alloc.DefineValue(lower_inst, lower_result); + ctx.reg_alloc.DefineValue(code, lower_inst, lower_result); } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.vpmuludq(result, x, y); code.vpsrlq(x, x, 32); @@ -5765,13 +5699,13 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { code.vpmuludq(x, x, y); code.shufps(result, x, 0b11011101); - ctx.reg_alloc.DefineValue(upper_inst, result); + ctx.reg_alloc.DefineValue(code, upper_inst, result); } else { - const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm() : Xbyak::Xmm{-1}; - const Xbyak::Xmm lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm() : Xbyak::Xmm{-1}; + const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; + const Xbyak::Xmm lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm(code) : Xbyak::Xmm{-1}; // calculate unsigned multiply code.movdqa(tmp, x); @@ -5791,8 +5725,8 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) { if (lower_inst) code.psllq(x, 32); if (upper_inst) code.por(upper_result, tmp); if (lower_inst) code.por(lower_result, x); - if (upper_inst) ctx.reg_alloc.DefineValue(upper_inst, upper_result); - if (lower_inst) ctx.reg_alloc.DefineValue(lower_inst, lower_result); + if (upper_inst) ctx.reg_alloc.DefineValue(code, upper_inst, upper_result); + if (lower_inst) ctx.reg_alloc.DefineValue(code, lower_inst, lower_result); } } @@ -5988,65 +5922,65 @@ void EmitX64::EmitVectorUnsignedSaturatedShiftLeft64(EmitContext& ctx, IR::Inst* void EmitX64::EmitVectorZeroExtend8(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxbw(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklbw(a, zeros); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorZeroExtend16(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxwd(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklwd(a, zeros); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorZeroExtend32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); if (code.HasHostFeature(HostFeature::SSE41)) { code.pmovzxdq(a, a); } else { - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpckldq(a, zeros); } - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorZeroExtend64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm zeros = ctx.reg_alloc.ScratchXmm(code); code.pxor(zeros, zeros); code.punpcklqdq(a, zeros); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitVectorZeroUpper(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.movq(a, a); // TODO: !IsLastUse - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitZeroVector(EmitContext& ctx, IR::Inst* inst) { - const Xbyak::Xmm a = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm a = ctx.reg_alloc.ScratchXmm(code); code.pxor(a, a); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp index 0c93b517c1..c2bd071b99 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_floating_point.cpp @@ -96,7 +96,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std:: if (code.HasHostFeature(HostFeature::SSE41)) { code.ptest(nan_mask, nan_mask); } else { - const Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32(); + const Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr(code).cvt32(); code.movmskps(bitmask, nan_mask); code.cmp(bitmask, 0); } @@ -312,13 +312,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins Xbyak::Xmm result; if constexpr (std::is_member_function_pointer_v) { - result = ctx.reg_alloc.UseScratchXmm(args[0]); + result = ctx.reg_alloc.UseScratchXmm(code, args[0]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { (code.*fn)(result); }); } else { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); - result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + result = ctx.reg_alloc.ScratchXmm(code); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { fn(result, xmm_a); }); @@ -328,13 +328,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), result); } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code); if constexpr (std::is_member_function_pointer_v) { code.movaps(result, xmm_a); @@ -352,7 +352,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins HandleNaNs(code, ctx, fpcr_controlled, {result, xmm_a}, nan_mask, nan_handler); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } enum class CheckInputNaN { @@ -368,8 +368,8 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i const bool fpcr_controlled = args[2].GetImmediateU1(); if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) { - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); if constexpr (std::is_member_function_pointer_v) { MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { @@ -385,14 +385,14 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), xmm_a); } - ctx.reg_alloc.DefineValue(inst, xmm_a); + ctx.reg_alloc.DefineValue(code, inst, xmm_a); return; } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code); code.movaps(result, xmm_a); @@ -422,7 +422,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i HandleNaNs(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -448,16 +448,16 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak template void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, F lambda) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1(); EmitTwoOpFallbackWithoutRegAlloc(code, ctx, result, arg1, lambda, fpcr_controlled); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } template @@ -501,17 +501,17 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby template void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); const bool fpcr_controlled = args[2].GetImmediateU1(); EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, lambda, fpcr_controlled); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } enum class LoadPreviousResult { @@ -565,16 +565,16 @@ template void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[3].GetImmediateU1(); - const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(code, args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); ctx.reg_alloc.EndOfAllocScope(); - ctx.reg_alloc.HostCall(nullptr); + ctx.reg_alloc.HostCall(code, nullptr); EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, arg3, lambda, fpcr_controlled); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } } // anonymous namespace @@ -582,9 +582,9 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam template void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); code.andps(a, GetNonSignMaskVector(code)); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) { @@ -626,29 +626,29 @@ void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); code.cmpeqps(a, b); }); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); code.cmpeqpd(a, b); }); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } template @@ -664,13 +664,13 @@ void EmitX64::EmitFPVectorFromHalf32(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(code, args[0]); code.vcvtph2ps(result, value); ForceToDefaultNaN<32>(code, ctx.FPCR(fpcr_controlled), result); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { switch (rounding_mode) { case FP::RoundingMode::ToNearest_TieEven: @@ -696,7 +696,7 @@ void EmitX64::EmitFPVectorFromHalf32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]); const int fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); const bool fpcr_controlled = args[3].GetImmediateU1(); @@ -709,12 +709,12 @@ void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) { } }); - ctx.reg_alloc.DefineValue(inst, xmm); + ctx.reg_alloc.DefineValue(code, inst, xmm); } void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]); const int fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); const bool fpcr_controlled = args[3].GetImmediateU1(); @@ -724,8 +724,8 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { code.vcvtqq2pd(xmm, xmm); } else if (code.HasHostFeature(HostFeature::SSE41)) { - const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); // First quadword code.movq(tmp, xmm); @@ -738,9 +738,9 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) { // Combine code.unpcklpd(xmm, xmm_tmp); } else { - const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code); // First quadword code.movhlps(high_xmm, xmm); @@ -760,12 +760,12 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) { } }); - ctx.reg_alloc.DefineValue(inst, xmm); + ctx.reg_alloc.DefineValue(code, inst, xmm); } void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]); const int fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); const bool fpcr_controlled = args[3].GetImmediateU1(); @@ -779,7 +779,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) const Xbyak::Address mem_53000000 = code.BConst<32>(xword, 0x53000000); const Xbyak::Address mem_D3000080 = code.BConst<32>(xword, 0xD3000080); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vpblendw(tmp, xmm, mem_4B000000, 0b10101010); @@ -810,12 +810,12 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) } }); - ctx.reg_alloc.DefineValue(inst, xmm); + ctx.reg_alloc.DefineValue(code, inst, xmm); } void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]); const int fbits = args[1].GetImmediateU8(); const FP::RoundingMode rounding_mode = static_cast(args[2].GetImmediateU8()); const bool fpcr_controlled = args[3].GetImmediateU1(); @@ -828,9 +828,9 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) const Xbyak::Address unpack = code.Const(xword, 0x4530000043300000, 0); const Xbyak::Address subtrahend = code.Const(xword, 0x4330000000000000, 0x4530000000000000); - const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { code.vmovapd(unpack_reg, unpack); @@ -846,7 +846,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) code.vhaddpd(xmm, tmp1, xmm); } else { - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); code.movapd(unpack_reg, unpack); code.movapd(subtrahend_reg, subtrahend); @@ -877,63 +877,63 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) } }); - ctx.reg_alloc.DefineValue(inst, xmm); + ctx.reg_alloc.DefineValue(code, inst, xmm); } void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); code.cmpltps(b, a); }); - ctx.reg_alloc.DefineValue(inst, b); + ctx.reg_alloc.DefineValue(code, inst, b); } void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); code.cmpltpd(b, a); }); - ctx.reg_alloc.DefineValue(inst, b); + ctx.reg_alloc.DefineValue(code, inst, b); } void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); code.cmpleps(b, a); }); - ctx.reg_alloc.DefineValue(inst, b); + ctx.reg_alloc.DefineValue(code, inst, b); } void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]); + const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0); code.cmplepd(b, a); }); - ctx.reg_alloc.DefineValue(inst, b); + ctx.reg_alloc.DefineValue(code, inst, b); } template @@ -942,12 +942,12 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in if (ctx.FPCR(fpcr_controlled).DN()) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]); const Xbyak::Xmm mask = xmm0; - const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { DenormalsAreZero(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask); @@ -994,7 +994,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in } }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -1002,11 +1002,11 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in EmitThreeOpVectorOperation( code, ctx, inst, [&](const Xbyak::Xmm& result, Xbyak::Xmm xmm_b) { const Xbyak::Xmm mask = xmm0; - const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(code); if (ctx.FPCR(fpcr_controlled).FZ()) { const Xbyak::Xmm prev_xmm_b = xmm_b; - xmm_b = ctx.reg_alloc.ScratchXmm(); + xmm_b = ctx.reg_alloc.ScratchXmm(code); code.movaps(xmm_b, prev_xmm_b); DenormalsAreZero(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask); } @@ -1053,13 +1053,13 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I const bool fpcr_controlled = inst->GetArg(2).GetU1(); auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm intermediate_result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm intermediate_result = ctx.reg_alloc.ScratchXmm(code); const Xbyak::Xmm tmp1 = xmm0; - const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code); // NaN requirements: // op1 op2 result @@ -1139,7 +1139,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I } }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -1230,7 +1230,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I } }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) { @@ -1316,27 +1316,27 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::FMA) && !needs_rounding_correction && !needs_nan_correction) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(code, args[2]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { FCODE(vfmadd231p)(result, xmm_b, xmm_c); ForceToDefaultNaN(code, ctx.FPCR(fpcr_controlled), result); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(code, args[2]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); @@ -1375,21 +1375,21 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { code.jmp(*end, code.T_NEAR); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]); - const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(code, args[1]); + const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(code, args[2]); FCODE(mulp)(operand2, operand3); FCODE(addp)(operand1, operand2); - ctx.reg_alloc.DefineValue(inst, operand1); + ctx.reg_alloc.DefineValue(code, inst, operand1); return; } } @@ -1417,10 +1417,10 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst const bool fpcr_controlled = args[2].GetImmediateU1(); if (ctx.FPCR(fpcr_controlled).DN() && code.HasHostFeature(HostFeature::AVX)) { - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm(code); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { FCODE(vcmpunordp)(xmm0, result, operand); @@ -1434,14 +1434,14 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst FCODE(blendvp)(result, twos); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code); code.movaps(nan_mask, xmm_b); code.movaps(result, xmm_a); @@ -1464,7 +1464,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst HandleNaNs(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } void EmitX64::EmitFPVectorMulX32(EmitContext& ctx, IR::Inst* inst) { @@ -1482,12 +1482,12 @@ void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]); const Xbyak::Address mask = code.BConst(xword, sign_mask); code.xorps(a, mask); - ctx.reg_alloc.DefineValue(inst, a); + ctx.reg_alloc.DefineValue(code, inst, a); } void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) { @@ -1512,7 +1512,7 @@ void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { EmitThreeOpVectorOperation<32, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) { - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); code.xorps(zero, zero); code.punpcklqdq(result, xmm_b); code.haddps(result, zero); @@ -1521,7 +1521,7 @@ void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) { EmitThreeOpVectorOperation<64, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) { - const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code); code.xorps(zero, zero); code.punpcklqdq(result, xmm_b); code.haddpd(result, zero); @@ -1535,8 +1535,8 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins if constexpr (fsize != 16) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { FCODE(vrcp14p)(result, operand); @@ -1550,7 +1550,7 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins } } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } } @@ -1589,16 +1589,16 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.movaps(result, GetVectorOf(code)); FCODE(vfnmadd231p)(result, operand1, operand2); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -1606,10 +1606,10 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); @@ -1633,22 +1633,22 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.jmp(*end, code.T_NEAR); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movaps(result, GetVectorOf(code)); FCODE(mulp)(operand1, operand2); FCODE(subp)(result, operand1); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } } @@ -1757,8 +1757,8 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins if constexpr (fsize != 16) { if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { FCODE(vrsqrt14p)(result, operand); @@ -1772,7 +1772,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins } } - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -1780,9 +1780,9 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[1].GetImmediateU1(); - const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code); SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel(); @@ -1816,7 +1816,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins code.jmp(*end, code.T_NEAR); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } } @@ -1851,9 +1851,9 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { code.vmovaps(result, GetVectorOf(code)); @@ -1861,7 +1861,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in FCODE(vmulp)(result, result, GetVectorOf(code)); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } @@ -1869,11 +1869,11 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in auto args = ctx.reg_alloc.GetArgumentInfo(inst); const bool fpcr_controlled = args[2].GetImmediateU1(); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code); SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel(); @@ -1902,23 +1902,23 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.jmp(*end, code.T_NEAR); }); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); code.movaps(result, GetVectorOf(code)); FCODE(mulp)(operand1, operand2); FCODE(subp)(result, operand1); FCODE(mulp)(result, GetVectorOf(code)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } } @@ -1972,12 +1972,12 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); ForceToDefaultNaN<32>(code, ctx.FPCR(fpcr_controlled), result); code.vcvtps2ph(result, result, u8(*round_imm)); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { switch (rounding_mode) { case FP::RoundingMode::ToNearest_TieEven: @@ -2018,7 +2018,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::SSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]); + const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]); MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] { const int round_imm = [&] { @@ -2045,8 +2045,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) { code.vcvttpd2qq(src, src); } else { - const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(); - const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(code); + const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(code); code.cvttsd2si(lo, src); code.punpckhqdq(src, src); @@ -2093,12 +2093,12 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { FCODE(andp)(src, xmm0); // Will we exceed unsigned range? - const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(code); code.movaps(exceed_unsigned, GetVectorOf(code)); FCODE(cmplep)(exceed_unsigned, src); // Will be exceed signed range? - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.movaps(tmp, GetVectorOf(code)); code.movaps(xmm0, tmp); FCODE(cmplep)(xmm0, src); @@ -2122,7 +2122,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) { } }); - ctx.reg_alloc.DefineValue(inst, src); + ctx.reg_alloc.DefineValue(code, inst, src); return; } } diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp index 412bb2bfaa..03ded4066d 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_vector_saturation.cpp @@ -26,9 +26,9 @@ namespace { void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*saturated_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*unsaturated_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*sub_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8(); + const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8(); code.movaps(xmm0, result); @@ -39,7 +39,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in if (code.HasHostFeature(HostFeature::SSE41)) { code.ptest(xmm0, xmm0); } else { - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); code.pxor(tmp, tmp); code.pcmpeqw(xmm0, tmp); code.pmovmskb(overflow.cvt32(), xmm0); @@ -49,7 +49,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.setnz(overflow); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } enum class Op { @@ -65,10 +65,10 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) { - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8(); code.movaps(xmm0, operand1); @@ -91,15 +91,15 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.setnz(overflow); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } - const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(args[0]) : ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm() : operand1; - const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(code, args[0]) : ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm(code) : operand1; + const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); if (code.HasHostFeature(HostFeature::AVX)) { if constexpr (op == Op::Add) { @@ -150,7 +150,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in if (code.HasHostFeature(HostFeature::SSE41)) { FCODE(blendvp)(result, tmp); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { code.psrad(xmm0, 31); if constexpr (esize == 64) { @@ -161,7 +161,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in code.pandn(xmm0, result); code.por(tmp, xmm0); - ctx.reg_alloc.DefineValue(inst, tmp); + ctx.reg_alloc.DefineValue(code, inst, tmp); } } @@ -172,10 +172,10 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* auto args = ctx.reg_alloc.GetArgumentInfo(inst); if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) { - const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(); - const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8(); + const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code); + const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8(); if constexpr (op == Op::Add) { ICODE(vpadd)(result, operand1, operand2); @@ -191,15 +191,15 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* code.setnz(overflow); code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); return; } - const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(args[0]) : ctx.reg_alloc.UseScratchXmm(args[0]); - const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]); - const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm() : operand1; - const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8(); - const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); + const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(code, args[0]) : ctx.reg_alloc.UseScratchXmm(code, args[0]); + const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]); + const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm(code) : operand1; + const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8(); + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code); if constexpr (op == Op::Add) { if (code.HasHostFeature(HostFeature::AVX)) { @@ -252,10 +252,10 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* if constexpr (op == Op::Add) { code.por(result, tmp); - ctx.reg_alloc.DefineValue(inst, result); + ctx.reg_alloc.DefineValue(code, inst, result); } else { code.pandn(tmp, result); - ctx.reg_alloc.DefineValue(inst, tmp); + ctx.reg_alloc.DefineValue(code, inst, tmp); } } diff --git a/src/dynarmic/src/dynarmic/backend/x64/hostloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/hostloc.cpp deleted file mode 100644 index b5e2a5f18b..0000000000 --- a/src/dynarmic/src/dynarmic/backend/x64/hostloc.cpp +++ /dev/null @@ -1,25 +0,0 @@ -/* This file is part of the dynarmic project. - * Copyright (c) 2016 MerryMage - * SPDX-License-Identifier: 0BSD - */ - -#include "dynarmic/backend/x64/hostloc.h" - -#include - -#include "dynarmic/backend/x64/abi.h" -#include "dynarmic/backend/x64/stack_layout.h" - -namespace Dynarmic::Backend::X64 { - -Xbyak::Reg64 HostLocToReg64(HostLoc loc) { - ASSERT(HostLocIsGPR(loc)); - return Xbyak::Reg64(static_cast(loc)); -} - -Xbyak::Xmm HostLocToXmm(HostLoc loc) { - ASSERT(HostLocIsXMM(loc)); - return Xbyak::Xmm(static_cast(loc) - static_cast(HostLoc::XMM0)); -} - -} // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/hostloc.h b/src/dynarmic/src/dynarmic/backend/x64/hostloc.h index d6fb88554e..65eeb11b55 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/hostloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/hostloc.h @@ -152,7 +152,14 @@ const HostLocList any_xmm = { HostLoc::XMM15, }; -Xbyak::Reg64 HostLocToReg64(HostLoc loc); -Xbyak::Xmm HostLocToXmm(HostLoc loc); +inline Xbyak::Reg64 HostLocToReg64(HostLoc loc) noexcept { + ASSERT(HostLocIsGPR(loc)); + return Xbyak::Reg64(int(loc)); +} + +inline Xbyak::Xmm HostLocToXmm(HostLoc loc) noexcept { + ASSERT(HostLocIsXMM(loc)); + return Xbyak::Xmm(int(loc) - int(HostLoc::XMM0)); +} } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp index 4012f5cb4d..2f13298480 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.cpp @@ -24,15 +24,6 @@ namespace Dynarmic::Backend::X64 { -#define MAYBE_AVX(OPCODE, ...) \ - [&] { \ - if (code->HasHostFeature(HostFeature::AVX)) { \ - code->v##OPCODE(__VA_ARGS__); \ - } else { \ - code->OPCODE(__VA_ARGS__); \ - } \ - }() - static inline bool CanExchange(const HostLoc a, const HostLoc b) noexcept { return HostLocIsGPR(a) && HostLocIsGPR(b); } @@ -107,14 +98,14 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept { max_bit_width = std::max(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType()))); } -void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept { +void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode& code, size_t host_loc_index) const noexcept { using namespace Xbyak::util; for (auto const value : values) { - code->mov(code->ABI_PARAM1, rsp); - code->mov(code->ABI_PARAM2, host_loc_index); - code->mov(code->ABI_PARAM3, value->GetName()); - code->mov(code->ABI_PARAM4, GetBitWidth(value->GetType())); - code->CallFunction(PrintVerboseDebuggingOutputLine); + code.mov(code.ABI_PARAM1, rsp); + code.mov(code.ABI_PARAM2, host_loc_index); + code.mov(code.ABI_PARAM3, value->GetName()); + code.mov(code.ABI_PARAM4, GetBitWidth(value->GetType())); + code.CallFunction(PrintVerboseDebuggingOutputLine); } } @@ -128,7 +119,7 @@ bool Argument::FitsInImmediateU32() const noexcept { bool Argument::FitsInImmediateS32() const noexcept { if (!IsImmediate()) return false; - const s64 imm = static_cast(value.GetImmediateAsU64()); + const s64 imm = s64(value.GetImmediateAsU64()); return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF); } @@ -174,36 +165,38 @@ IR::AccType Argument::GetImmediateAccType() const noexcept { } /// Is this value currently in a GPR? -bool Argument::IsInGpr() const noexcept { +bool Argument::IsInGpr(RegAlloc& reg_alloc) const noexcept { if (IsImmediate()) return false; return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst())); } /// Is this value currently in a XMM? -bool Argument::IsInXmm() const noexcept { +bool Argument::IsInXmm(RegAlloc& reg_alloc) const noexcept { if (IsImmediate()) return false; return HostLocIsXMM(*reg_alloc.ValueLocation(value.GetInst())); } /// Is this value currently in memory? -bool Argument::IsInMemory() const noexcept { +bool Argument::IsInMemory(RegAlloc& reg_alloc) const noexcept { if (IsImmediate()) return false; return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst())); } -RegAlloc::RegAlloc(BlockOfCode* code, boost::container::static_vector gpr_order, boost::container::static_vector xmm_order) noexcept +RegAlloc::RegAlloc(boost::container::static_vector gpr_order, boost::container::static_vector xmm_order) noexcept : gpr_order(gpr_order), - xmm_order(xmm_order), - code(code) + xmm_order(xmm_order) {} -//static std::uint64_t Zfncwjkrt_blockOfCodeShim = 0; - RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(const IR::Inst* inst) noexcept { - ArgumentInfo ret{Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}}; + ArgumentInfo ret{ + Argument{}, + Argument{}, + Argument{}, + Argument{} + }; for (size_t i = 0; i < inst->NumArgs(); i++) { const auto arg = inst->GetArg(i); ret[i].value = arg; @@ -228,34 +221,34 @@ void RegAlloc::RegisterPseudoOperation(const IR::Inst* inst) noexcept { } } -Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) noexcept { +Xbyak::Reg64 RegAlloc::UseScratchGpr(BlockOfCode& code, Argument& arg) noexcept { ASSERT(!arg.allocated); arg.allocated = true; - return HostLocToReg64(UseScratchImpl(arg.value, gpr_order)); + return HostLocToReg64(UseScratchImpl(code, arg.value, gpr_order)); } -Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) noexcept { +Xbyak::Xmm RegAlloc::UseScratchXmm(BlockOfCode& code, Argument& arg) noexcept { ASSERT(!arg.allocated); arg.allocated = true; - return HostLocToXmm(UseScratchImpl(arg.value, xmm_order)); + return HostLocToXmm(UseScratchImpl(code, arg.value, xmm_order)); } -void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) noexcept { +void RegAlloc::UseScratch(BlockOfCode& code, Argument& arg, HostLoc host_loc) noexcept { ASSERT(!arg.allocated); arg.allocated = true; - UseScratchImpl(arg.value, {host_loc}); + UseScratchImpl(code, arg.value, {host_loc}); } -void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept { +void RegAlloc::DefineValue(BlockOfCode& code, IR::Inst* inst, const Xbyak::Reg& reg) noexcept { ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG); const auto hostloc = static_cast(reg.getIdx() + static_cast(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX)); - DefineValueImpl(inst, hostloc); + DefineValueImpl(code, inst, hostloc); } -void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) noexcept { +void RegAlloc::DefineValue(BlockOfCode& code, IR::Inst* inst, Argument& arg) noexcept { ASSERT(!arg.allocated); arg.allocated = true; - DefineValueImpl(inst, arg.value); + DefineValueImpl(code, inst, arg.value); } void RegAlloc::Release(const Xbyak::Reg& reg) noexcept { @@ -264,9 +257,9 @@ void RegAlloc::Release(const Xbyak::Reg& reg) noexcept { LocInfo(hostloc).ReleaseOne(); } -HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept { +HostLoc RegAlloc::UseImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept { if (use_value.IsImmediate()) { - return LoadImmediate(use_value, ScratchImpl(desired_locations)); + return LoadImmediate(code, use_value, ScratchImpl(code, desired_locations)); } const auto* use_inst = use_value.GetInst(); @@ -280,25 +273,25 @@ HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_ve } if (LocInfo(current_location).IsLocked()) { - return UseScratchImpl(use_value, desired_locations); + return UseScratchImpl(code, use_value, desired_locations); } const HostLoc destination_location = SelectARegister(desired_locations); if (max_bit_width > HostLocBitWidth(destination_location)) { - return UseScratchImpl(use_value, desired_locations); + return UseScratchImpl(code, use_value, desired_locations); } else if (CanExchange(destination_location, current_location)) { - Exchange(destination_location, current_location); + Exchange(code, destination_location, current_location); } else { - MoveOutOfTheWay(destination_location); - Move(destination_location, current_location); + MoveOutOfTheWay(code, destination_location); + Move(code, destination_location, current_location); } LocInfo(destination_location).ReadLock(); return destination_location; } -HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept { +HostLoc RegAlloc::UseScratchImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept { if (use_value.IsImmediate()) { - return LoadImmediate(use_value, ScratchImpl(desired_locations)); + return LoadImmediate(code, use_value, ScratchImpl(code, desired_locations)); } const auto* use_inst = use_value.GetInst(); @@ -308,7 +301,7 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::st const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end(); if (can_use_current_location && !LocInfo(current_location).IsLocked()) { if (!LocInfo(current_location).IsLastUse()) { - MoveOutOfTheWay(current_location); + MoveOutOfTheWay(code, current_location); } else { LocInfo(current_location).SetLastUse(); } @@ -317,20 +310,22 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::st } const HostLoc destination_location = SelectARegister(desired_locations); - MoveOutOfTheWay(destination_location); - CopyToScratch(bit_width, destination_location, current_location); + MoveOutOfTheWay(code, destination_location); + CopyToScratch(code, bit_width, destination_location, current_location); LocInfo(destination_location).WriteLock(); return destination_location; } -HostLoc RegAlloc::ScratchImpl(const boost::container::static_vector& desired_locations) noexcept { +HostLoc RegAlloc::ScratchImpl(BlockOfCode& code, const boost::container::static_vector& desired_locations) noexcept { const HostLoc location = SelectARegister(desired_locations); - MoveOutOfTheWay(location); + MoveOutOfTheWay(code, location); LocInfo(location).WriteLock(); return location; } -void RegAlloc::HostCall(IR::Inst* result_def, +void RegAlloc::HostCall( + BlockOfCode& code, + IR::Inst* result_def, const std::optional arg0, const std::optional arg1, const std::optional arg2, @@ -348,20 +343,20 @@ void RegAlloc::HostCall(IR::Inst* result_def, return ret; }(); - ScratchGpr(ABI_RETURN); - if (result_def) { - DefineValueImpl(result_def, ABI_RETURN); - } + ScratchGpr(code, ABI_RETURN); + if (result_def) + DefineValueImpl(code, result_def, ABI_RETURN); + for (size_t i = 0; i < args.size(); i++) { if (args[i]) { - UseScratch(*args[i], args_hostloc[i]); + UseScratch(code, *args[i], args_hostloc[i]); } else { - ScratchGpr(args_hostloc[i]); // TODO: Force spill + ScratchGpr(code, args_hostloc[i]); // TODO: Force spill } } // Must match with with ScratchImpl for (auto const gpr : other_caller_save) { - MoveOutOfTheWay(gpr); + MoveOutOfTheWay(code, gpr); LocInfo(gpr).WriteLock(); } for (size_t i = 0; i < args.size(); i++) { @@ -370,13 +365,13 @@ void RegAlloc::HostCall(IR::Inst* result_def, const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]); switch (args[i]->get().GetType()) { case IR::Type::U8: - code->movzx(reg.cvt32(), reg.cvt8()); + code.movzx(reg.cvt32(), reg.cvt8()); break; case IR::Type::U16: - code->movzx(reg.cvt32(), reg.cvt16()); + code.movzx(reg.cvt32(), reg.cvt16()); break; case IR::Type::U32: - code->mov(reg.cvt32(), reg.cvt32()); + code.mov(reg.cvt32(), reg.cvt32()); break; case IR::Type::U64: break; //no op @@ -387,18 +382,18 @@ void RegAlloc::HostCall(IR::Inst* result_def, } } -void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept { +void RegAlloc::AllocStackSpace(BlockOfCode& code, const size_t stack_space) noexcept { ASSERT(stack_space < size_t((std::numeric_limits::max)())); ASSERT(reserved_stack_space == 0); reserved_stack_space = stack_space; - code->sub(code->rsp, u32(stack_space)); + code.sub(code.rsp, u32(stack_space)); } -void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept { +void RegAlloc::ReleaseStackSpace(BlockOfCode& code, const size_t stack_space) noexcept { ASSERT(stack_space < size_t((std::numeric_limits::max)())); ASSERT(reserved_stack_space == stack_space); reserved_stack_space = 0; - code->add(code->rsp, u32(stack_space)); + code.add(code.rsp, u32(stack_space)); } HostLoc RegAlloc::SelectARegister(const boost::container::static_vector& desired_locations) const noexcept { @@ -458,92 +453,75 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector RegAlloc::ValueLocation(const IR::Inst* value) const noexcept { + for (size_t i = 0; i < hostloc_info.size(); i++) + if (hostloc_info[i].ContainsValue(value)) + return HostLoc(i); + return std::nullopt; +} + +void RegAlloc::DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, HostLoc host_loc) noexcept { ASSERT(!ValueLocation(def_inst) && "def_inst has already been defined"); LocInfo(host_loc).AddValue(def_inst); } -void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept { +void RegAlloc::DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, const IR::Value& use_inst) noexcept { ASSERT(!ValueLocation(def_inst) && "def_inst has already been defined"); if (use_inst.IsImmediate()) { - const HostLoc location = ScratchImpl(gpr_order); - DefineValueImpl(def_inst, location); - LoadImmediate(use_inst, location); + const HostLoc location = ScratchImpl(code, gpr_order); + DefineValueImpl(code, def_inst, location); + LoadImmediate(code, use_inst, location); return; } ASSERT(ValueLocation(use_inst.GetInst()) && "use_inst must already be defined"); const HostLoc location = *ValueLocation(use_inst.GetInst()); - DefineValueImpl(def_inst, location); + DefineValueImpl(code, def_inst, location); } -HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept { - ASSERT(imm.IsImmediate() && "imm is not an immediate"); - if (HostLocIsGPR(host_loc)) { - const Xbyak::Reg64 reg = HostLocToReg64(host_loc); - const u64 imm_value = imm.GetImmediateAsU64(); - if (imm_value == 0) { - code->xor_(reg.cvt32(), reg.cvt32()); - } else { - code->mov(reg, imm_value); - } - } else if (HostLocIsXMM(host_loc)) { - const Xbyak::Xmm reg = HostLocToXmm(host_loc); - const u64 imm_value = imm.GetImmediateAsU64(); - if (imm_value == 0) { - MAYBE_AVX(xorps, reg, reg); - } else { - MAYBE_AVX(movaps, reg, code->Const(code->xword, imm_value)); - } - } else { - UNREACHABLE(); - } - return host_loc; -} - -void RegAlloc::Move(HostLoc to, HostLoc from) noexcept { +void RegAlloc::Move(BlockOfCode& code, HostLoc to, HostLoc from) noexcept { const size_t bit_width = LocInfo(from).GetMaxBitWidth(); ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked()); ASSERT(bit_width <= HostLocBitWidth(to)); ASSERT(!LocInfo(from).IsEmpty() && "Mov eliminated"); - EmitMove(bit_width, to, from); + EmitMove(code, bit_width, to, from); LocInfo(to) = std::exchange(LocInfo(from), {}); } -void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept { +void RegAlloc::CopyToScratch(BlockOfCode& code, size_t bit_width, HostLoc to, HostLoc from) noexcept { ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty()); - EmitMove(bit_width, to, from); + EmitMove(code, bit_width, to, from); } -void RegAlloc::Exchange(HostLoc a, HostLoc b) noexcept { +void RegAlloc::Exchange(BlockOfCode& code, HostLoc a, HostLoc b) noexcept { ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked()); ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b)); ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a)); if (LocInfo(a).IsEmpty()) { - Move(a, b); + Move(code, a, b); } else if (LocInfo(b).IsEmpty()) { - Move(b, a); + Move(code, b, a); } else { - EmitExchange(a, b); + EmitExchange(code, a, b); std::swap(LocInfo(a), LocInfo(b)); } } -void RegAlloc::MoveOutOfTheWay(HostLoc reg) noexcept { +void RegAlloc::MoveOutOfTheWay(BlockOfCode& code, HostLoc reg) noexcept { ASSERT(!LocInfo(reg).IsLocked()); if (!LocInfo(reg).IsEmpty()) { - SpillRegister(reg); + SpillRegister(code, reg); } } -void RegAlloc::SpillRegister(HostLoc loc) noexcept { +void RegAlloc::SpillRegister(BlockOfCode& code, HostLoc loc) noexcept { ASSERT(HostLocIsRegister(loc) && "Only registers can be spilled"); ASSERT(!LocInfo(loc).IsEmpty() && "There is no need to spill unoccupied registers"); ASSERT(!LocInfo(loc).IsLocked() && "Registers that have been allocated must not be spilt"); auto const new_loc = FindFreeSpill(HostLocIsXMM(loc)); - Move(new_loc, loc); + Move(code, new_loc, loc); } HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { @@ -568,9 +546,39 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept { if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty()) return loc; UNREACHABLE(); -}; +} -void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept { +#define MAYBE_AVX(OPCODE, ...) \ + [&] { \ + if (code.HasHostFeature(HostFeature::AVX)) code.v##OPCODE(__VA_ARGS__); \ + else code.OPCODE(__VA_ARGS__); \ + }() + +HostLoc RegAlloc::LoadImmediate(BlockOfCode& code, IR::Value imm, HostLoc host_loc) noexcept { + ASSERT(imm.IsImmediate() && "imm is not an immediate"); + if (HostLocIsGPR(host_loc)) { + const Xbyak::Reg64 reg = HostLocToReg64(host_loc); + const u64 imm_value = imm.GetImmediateAsU64(); + if (imm_value == 0) { + code.xor_(reg.cvt32(), reg.cvt32()); + } else { + code.mov(reg, imm_value); + } + } else if (HostLocIsXMM(host_loc)) { + const Xbyak::Xmm reg = HostLocToXmm(host_loc); + const u64 imm_value = imm.GetImmediateAsU64(); + if (imm_value == 0) { + MAYBE_AVX(xorps, reg, reg); + } else { + MAYBE_AVX(movaps, reg, code.Const(code.xword, imm_value)); + } + } else { + UNREACHABLE(); + } + return host_loc; +} + +void RegAlloc::EmitMove(BlockOfCode& code, const size_t bit_width, const HostLoc to, const HostLoc from) noexcept { auto const spill_to_op_arg_helper = [&](HostLoc loc, size_t reserved_stack_space) { ASSERT(HostLocIsSpill(loc)); size_t i = size_t(loc) - size_t(HostLoc::FirstSpill); @@ -585,9 +593,9 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc } else if (HostLocIsGPR(to) && HostLocIsGPR(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(HostLocToReg64(to), HostLocToReg64(from)); + code.mov(HostLocToReg64(to), HostLocToReg64(from)); } else { - code->mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32()); + code.mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32()); } } else if (HostLocIsXMM(to) && HostLocIsGPR(from)) { ASSERT(bit_width != 128); @@ -642,25 +650,26 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc } else if (HostLocIsGPR(to) && HostLocIsSpill(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]); + code.mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]); } else { - code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]); + code.mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]); } } else if (HostLocIsSpill(to) && HostLocIsGPR(from)) { ASSERT(bit_width != 128); if (bit_width == 64) { - code->mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from)); + code.mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from)); } else { - code->mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32()); + code.mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32()); } } else { UNREACHABLE(); } } +#undef MAYBE_AVX -void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept { +void RegAlloc::EmitExchange(BlockOfCode& code, const HostLoc a, const HostLoc b) noexcept { ASSERT(HostLocIsGPR(a) && HostLocIsGPR(b) && "Exchanging XMM registers is uneeded OR invalid emit"); - code->xchg(HostLocToReg64(a), HostLocToReg64(b)); + code.xchg(HostLocToReg64(a), HostLocToReg64(b)); } } // namespace Dynarmic::Backend::X64 diff --git a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h index 0e9e465774..e1cccadd6b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h +++ b/src/dynarmic/src/dynarmic/backend/x64/reg_alloc.h @@ -81,7 +81,7 @@ public: return 1 << max_bit_width; } void AddValue(IR::Inst* inst) noexcept; - void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept; + void EmitVerboseDebuggingOutput(BlockOfCode& code, size_t host_loc_index) const noexcept; private: //non trivial boost::container::small_vector values; //24 @@ -129,16 +129,15 @@ public: IR::AccType GetImmediateAccType() const noexcept; /// Is this value currently in a GPR? - bool IsInGpr() const noexcept; - bool IsInXmm() const noexcept; - bool IsInMemory() const noexcept; + bool IsInGpr(RegAlloc& reg_alloc) const noexcept; + bool IsInXmm(RegAlloc& reg_alloc) const noexcept; + bool IsInMemory(RegAlloc& reg_alloc) const noexcept; private: friend class RegAlloc; - explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {} + explicit Argument() {} //data IR::Value value; //8 - RegAlloc& reg_alloc; //8 bool allocated = false; //1 }; @@ -146,55 +145,57 @@ class RegAlloc final { public: using ArgumentInfo = std::array; RegAlloc() noexcept = default; - RegAlloc(BlockOfCode* code, boost::container::static_vector gpr_order, boost::container::static_vector xmm_order) noexcept; + RegAlloc(boost::container::static_vector gpr_order, boost::container::static_vector xmm_order) noexcept; ArgumentInfo GetArgumentInfo(const IR::Inst* inst) noexcept; void RegisterPseudoOperation(const IR::Inst* inst) noexcept; inline bool IsValueLive(const IR::Inst* inst) const noexcept { return !!ValueLocation(inst); } - inline Xbyak::Reg64 UseGpr(Argument& arg) noexcept { + inline Xbyak::Reg64 UseGpr(BlockOfCode& code, Argument& arg) noexcept { ASSERT(!arg.allocated); arg.allocated = true; - return HostLocToReg64(UseImpl(arg.value, gpr_order)); + return HostLocToReg64(UseImpl(code, arg.value, gpr_order)); } - inline Xbyak::Xmm UseXmm(Argument& arg) noexcept { + inline Xbyak::Xmm UseXmm(BlockOfCode& code, Argument& arg) noexcept { ASSERT(!arg.allocated); arg.allocated = true; - return HostLocToXmm(UseImpl(arg.value, xmm_order)); + return HostLocToXmm(UseImpl(code, arg.value, xmm_order)); } - inline OpArg UseOpArg(Argument& arg) noexcept { - return UseGpr(arg); + inline OpArg UseOpArg(BlockOfCode& code, Argument& arg) noexcept { + return UseGpr(code, arg); } - inline void Use(Argument& arg, const HostLoc host_loc) noexcept { + inline void Use(BlockOfCode& code, Argument& arg, const HostLoc host_loc) noexcept { ASSERT(!arg.allocated); arg.allocated = true; - UseImpl(arg.value, {host_loc}); + UseImpl(code, arg.value, {host_loc}); } - Xbyak::Reg64 UseScratchGpr(Argument& arg) noexcept; - Xbyak::Xmm UseScratchXmm(Argument& arg) noexcept; - void UseScratch(Argument& arg, HostLoc host_loc) noexcept; + Xbyak::Reg64 UseScratchGpr(BlockOfCode& code, Argument& arg) noexcept; + Xbyak::Xmm UseScratchXmm(BlockOfCode& code, Argument& arg) noexcept; + void UseScratch(BlockOfCode& code, Argument& arg, HostLoc host_loc) noexcept; - void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept; - void DefineValue(IR::Inst* inst, Argument& arg) noexcept; + void DefineValue(BlockOfCode& code, IR::Inst* inst, const Xbyak::Reg& reg) noexcept; + void DefineValue(BlockOfCode& code, IR::Inst* inst, Argument& arg) noexcept; void Release(const Xbyak::Reg& reg) noexcept; - inline Xbyak::Reg64 ScratchGpr() noexcept { - return HostLocToReg64(ScratchImpl(gpr_order)); + inline Xbyak::Reg64 ScratchGpr(BlockOfCode& code) noexcept { + return HostLocToReg64(ScratchImpl(code, gpr_order)); } - inline Xbyak::Reg64 ScratchGpr(const HostLoc desired_location) noexcept { - return HostLocToReg64(ScratchImpl({desired_location})); + inline Xbyak::Reg64 ScratchGpr(BlockOfCode& code, const HostLoc desired_location) noexcept { + return HostLocToReg64(ScratchImpl(code, {desired_location})); } - inline Xbyak::Xmm ScratchXmm() noexcept { - return HostLocToXmm(ScratchImpl(xmm_order)); + inline Xbyak::Xmm ScratchXmm(BlockOfCode& code) noexcept { + return HostLocToXmm(ScratchImpl(code, xmm_order)); } - inline Xbyak::Xmm ScratchXmm(HostLoc desired_location) noexcept { - return HostLocToXmm(ScratchImpl({desired_location})); + inline Xbyak::Xmm ScratchXmm(BlockOfCode& code, HostLoc desired_location) noexcept { + return HostLocToXmm(ScratchImpl(code, {desired_location})); } - void HostCall(IR::Inst* result_def = nullptr, + void HostCall( + BlockOfCode& code, + IR::Inst* result_def = nullptr, const std::optional arg0 = {}, const std::optional arg1 = {}, const std::optional arg2 = {}, @@ -202,67 +203,56 @@ public: ) noexcept; // TODO: Values in host flags - void AllocStackSpace(const size_t stack_space) noexcept; - void ReleaseStackSpace(const size_t stack_space) noexcept; + void AllocStackSpace(BlockOfCode& code, const size_t stack_space) noexcept; + void ReleaseStackSpace(BlockOfCode& code, const size_t stack_space) noexcept; inline void EndOfAllocScope() noexcept { - for (auto& iter : hostloc_info) { + for (auto& iter : hostloc_info) iter.ReleaseAll(); - } } inline void AssertNoMoreUses() noexcept { ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) noexcept { return i.IsEmpty(); })); } - inline void EmitVerboseDebuggingOutput() noexcept { - for (size_t i = 0; i < hostloc_info.size(); i++) { + inline void EmitVerboseDebuggingOutput(BlockOfCode& code) noexcept { + for (size_t i = 0; i < hostloc_info.size(); i++) hostloc_info[i].EmitVerboseDebuggingOutput(code, i); - } } private: friend struct Argument; HostLoc SelectARegister(const boost::container::static_vector& desired_locations) const noexcept; - inline std::optional ValueLocation(const IR::Inst* value) const noexcept { - for (size_t i = 0; i < hostloc_info.size(); i++) { - if (hostloc_info[i].ContainsValue(value)) { - return HostLoc(i); - } - } - return std::nullopt; - } + std::optional ValueLocation(const IR::Inst* value) const noexcept; + HostLoc UseImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept; + HostLoc UseScratchImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept; + HostLoc ScratchImpl(BlockOfCode& code, const boost::container::static_vector& desired_locations) noexcept; + void DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, HostLoc host_loc) noexcept; + void DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, const IR::Value& use_inst) noexcept; - HostLoc UseImpl(IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept; - HostLoc UseScratchImpl(IR::Value use_value, const boost::container::static_vector& desired_locations) noexcept; - HostLoc ScratchImpl(const boost::container::static_vector& desired_locations) noexcept; - void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept; - void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept; + HostLoc LoadImmediate(BlockOfCode& code, IR::Value imm, HostLoc host_loc) noexcept; + void Move(BlockOfCode& code, HostLoc to, HostLoc from) noexcept; + void CopyToScratch(BlockOfCode& code, size_t bit_width, HostLoc to, HostLoc from) noexcept; + void Exchange(BlockOfCode& code, HostLoc a, HostLoc b) noexcept; + void MoveOutOfTheWay(BlockOfCode& code, HostLoc reg) noexcept; - HostLoc LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept; - void Move(HostLoc to, HostLoc from) noexcept; - void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept; - void Exchange(HostLoc a, HostLoc b) noexcept; - void MoveOutOfTheWay(HostLoc reg) noexcept; - - void SpillRegister(HostLoc loc) noexcept; + void SpillRegister(BlockOfCode& code, HostLoc loc) noexcept; HostLoc FindFreeSpill(bool is_xmm) const noexcept; inline HostLocInfo& LocInfo(const HostLoc loc) noexcept { ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR); - return hostloc_info[static_cast(loc)]; + return hostloc_info[size_t(loc)]; } inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept { ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR); - return hostloc_info[static_cast(loc)]; + return hostloc_info[size_t(loc)]; } - void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept; - void EmitExchange(const HostLoc a, const HostLoc b) noexcept; + void EmitMove(BlockOfCode& code, const size_t bit_width, const HostLoc to, const HostLoc from) noexcept; + void EmitExchange(BlockOfCode& code, const HostLoc a, const HostLoc b) noexcept; //data alignas(64) boost::container::static_vector gpr_order; alignas(64) boost::container::static_vector xmm_order; alignas(64) std::array hostloc_info; - BlockOfCode* code = nullptr; size_t reserved_stack_space = 0; }; // Ensure a cache line (or less) is used, this is primordial