[dynarmic] remove reg_alloc from all arguments on x86 emitter (#3150)

From my tests this decreases JIT latency twofold, may be placebo.
saving reg_alloc while having it readily available is certainly a very interesting choice... afterall saving it onto %rdi is way more cheap isn't it? :)
Please test any performance rgressions, I got +20 FPS on Rain World (unlocked) off this change alone

Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3150
Reviewed-by: crueter <crueter@eden-emu.dev>
Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com>
Co-authored-by: lizzie <lizzie@eden-emu.dev>
Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2025-12-09 03:53:58 +01:00 committed by crueter
parent 5b019a81a7
commit 69a84ee0a6
No known key found for this signature in database
GPG Key ID: 425ACD2D4830EBC6
25 changed files with 2279 additions and 2350 deletions

View File

@ -77,9 +77,9 @@ void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
ASSERT(inst->GetArg(0).IsImmediate());
u64 imm64 = inst->GetArg(0).GetU64();
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr(code, {HostLoc::RCX});
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr(code);
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr(code).cvt32();
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
? u64(unique_hash_to_code_ptr[imm64])
: u64(code->GetReturnFromRunCodeAddress());

View File

@ -175,7 +175,6 @@ if ("x86_64" IN_LIST ARCHITECTURE)
backend/x64/exclusive_monitor.cpp
backend/x64/exclusive_monitor_friend.h
backend/x64/host_feature.h
backend/x64/hostloc.cpp
backend/x64/hostloc.h
backend/x64/jitstate_info.h
backend/x64/oparg.h

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
@ -60,7 +63,7 @@ void EmitIR<IR::Opcode::Pack2x32To1x64>(oaknut::CodeGenerator& code, EmitContext
template<>
void EmitIR<IR::Opcode::Pack2x64To1x128>(oaknut::CodeGenerator& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
bool const args_in_gpr[] = { args[0].IsInGpr(), args[1].IsInGpr() };
bool const args_in_gpr[] = { args[0].IsInGpr(ctx.reg_alloc), args[1].IsInGpr(ctx.reg_alloc) };
if (args_in_gpr[0] && args_in_gpr[1]) {
auto Xlo = ctx.reg_alloc.ReadX(args[0]);
auto Xhi = ctx.reg_alloc.ReadX(args[1]);

View File

@ -84,7 +84,7 @@ IR::AccType Argument::GetImmediateAccType() const {
return value.GetAccType();
}
HostLoc::Kind Argument::CurrentLocationKind() const {
HostLoc::Kind Argument::CurrentLocationKind(RegAlloc& reg_alloc) const {
return reg_alloc.ValueLocation(value.GetInst())->kind;
}
@ -131,7 +131,7 @@ void HostLocInfo::UpdateUses() {
}
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(IR::Inst* inst) {
ArgumentInfo ret = {Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
ArgumentInfo ret = {Argument{}, Argument{}, Argument{}, Argument{}};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const IR::Value arg = inst->GetArg(i);
ret[i].value = arg;

View File

@ -64,18 +64,18 @@ public:
IR::AccType GetImmediateAccType() const;
// Only valid if not immediate
HostLoc::Kind CurrentLocationKind() const;
bool IsInGpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Gpr; }
bool IsInFpr() const { return !IsImmediate() && CurrentLocationKind() == HostLoc::Kind::Fpr; }
HostLoc::Kind CurrentLocationKind(RegAlloc& reg_alloc) const;
bool IsInGpr(RegAlloc& reg_alloc) const {
return !IsImmediate() && CurrentLocationKind(reg_alloc) == HostLoc::Kind::Gpr;
}
bool IsInFpr(RegAlloc& reg_alloc) const {
return !IsImmediate() && CurrentLocationKind(reg_alloc) == HostLoc::Kind::Fpr;
}
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc)
: reg_alloc{reg_alloc} {}
bool allocated = false;
RegAlloc& reg_alloc;
IR::Value value;
bool allocated = false;
};
struct FlagsTag final {

View File

@ -117,7 +117,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
return gprs;
}();
new (&this->reg_alloc) RegAlloc(&code, gpr_order, any_xmm);
new (&this->reg_alloc) RegAlloc(gpr_order, any_xmm);
A32EmitContext ctx{conf, reg_alloc, block};
// Start emitting.
@ -283,47 +283,47 @@ void A32EmitX64::GenTerminalHandlers() {
void A32EmitX64::EmitA32SetCheckBit(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8();
code.mov(code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], to_store);
}
void A32EmitX64::EmitA32GetRegister(A32EmitContext& ctx, IR::Inst* inst) {
const A32::Reg reg = inst->GetArg(0).GetA32RegRef();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, MJitStateReg(reg));
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32GetExtendedRegister32(A32EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsSingleExtReg(reg));
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movss(result, MJitStateExtReg(reg));
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32GetExtendedRegister64(A32EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg));
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movsd(result, MJitStateExtReg(reg));
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32GetVector(A32EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg));
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
if (A32::IsDoubleExtReg(reg)) {
code.movsd(result, MJitStateExtReg(reg));
} else {
code.movaps(result, MJitStateExtReg(reg));
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) {
@ -332,11 +332,11 @@ void A32EmitX64::EmitA32SetRegister(A32EmitContext& ctx, IR::Inst* inst) {
if (args[1].IsImmediate()) {
code.mov(MJitStateReg(reg), args[1].GetImmediateU32());
} else if (args[1].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
} else if (args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movd(MJitStateReg(reg), to_store);
} else {
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
code.mov(MJitStateReg(reg), to_store);
}
}
@ -346,11 +346,11 @@ void A32EmitX64::EmitA32SetExtendedRegister32(A32EmitContext& ctx, IR::Inst* ins
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsSingleExtReg(reg));
if (args[1].IsInXmm()) {
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
if (args[1].IsInXmm(ctx.reg_alloc)) {
Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movss(MJitStateExtReg(reg), to_store);
} else {
Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[1]).cvt32();
Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
code.mov(MJitStateExtReg(reg), to_store);
}
}
@ -360,11 +360,11 @@ void A32EmitX64::EmitA32SetExtendedRegister64(A32EmitContext& ctx, IR::Inst* ins
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg));
if (args[1].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
if (args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movsd(MJitStateExtReg(reg), to_store);
} else {
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[1]);
code.mov(MJitStateExtReg(reg), to_store);
}
}
@ -374,7 +374,7 @@ void A32EmitX64::EmitA32SetVector(A32EmitContext& ctx, IR::Inst* inst) {
const A32::ExtReg reg = inst->GetArg(0).GetA32ExtRegRef();
ASSERT(A32::IsDoubleExtReg(reg) || A32::IsQuadExtReg(reg));
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
if (A32::IsDoubleExtReg(reg)) {
code.movsd(MJitStateExtReg(reg), to_store);
} else {
@ -383,9 +383,9 @@ void A32EmitX64::EmitA32SetVector(A32EmitContext& ctx, IR::Inst* inst) {
}
void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr(code).cvt32();
if (code.HasHostFeature(HostFeature::FastBMI2)) {
// Here we observe that cpsr_et and cpsr_ge are right next to each other in memory,
@ -428,15 +428,15 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
code.or_(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 cpsr = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 tmp2 = ctx.reg_alloc.ScratchGpr(code).cvt32();
if (conf.always_little_endian) {
code.and_(cpsr, 0xFFFFFDFF);
@ -501,7 +501,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], to_store);
}
@ -512,15 +512,15 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.shr(a, 28);
code.mov(b, NZCV::x64_mask);
code.pdep(a, a, b);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
} else {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(a, 28);
code.imul(a, a, NZCV::to_x64_multiplier);
@ -537,8 +537,8 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.shr(a, 28);
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
@ -546,7 +546,7 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
code.pdep(a, a, b);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
} else {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(a, 28);
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
@ -559,8 +559,8 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32SetCpsrNZ(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.movzx(tmp, code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1]);
code.and_(tmp, 1);
@ -577,12 +577,12 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
} else {
const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(args[1]).cvt8();
const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(code, args[1]).cvt8();
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
}
} else {
const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
if (args[1].IsImmediate()) {
const bool c = args[1].GetImmediateU1();
@ -590,7 +590,7 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
code.or_(nz, c);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
} else {
const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
code.or_(nz, c);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
@ -599,13 +599,13 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
}
static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]);
if (flag_bit != 0) {
code.shr(result, static_cast<int>(flag_bit));
}
code.and_(result, 1);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32GetCFlag(A32EmitContext& ctx, IR::Inst* inst) {
@ -619,27 +619,27 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], 1);
}
} else {
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8();
code.or_(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], to_store);
}
}
void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(!args[0].IsImmediate());
if (args[0].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
if (args[0].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
} else {
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
}
}
@ -656,8 +656,8 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], ge);
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(b, 0x01010101);
code.shr(a, 16);
@ -665,7 +665,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
code.imul(a, a, 0xFF);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a);
} else {
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(a, 16);
code.and_(a, 0xF);
@ -690,7 +690,7 @@ void A32EmitX64::EmitA32InstructionSynchronizationBarrier(A32EmitContext& ctx, I
return;
}
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
Devirtualize<&A32::UserCallbacks::InstructionSynchronizationBarrierRaised>(conf.callbacks).EmitCall(code);
}
@ -718,9 +718,9 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask);
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
} else {
const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32();
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 new_upper = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(code, arg).cvt32();
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 new_upper = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(mask, new_pc);
code.and_(mask, 1);
@ -745,7 +745,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
code.SwitchMxcsrOnExit();
if (conf.enable_cycle_counting) {
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
Devirtualize<&A32::UserCallbacks::AddTicks>(conf.callbacks).EmitCall(code);
@ -753,7 +753,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
}
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]);
Devirtualize<&A32::UserCallbacks::CallSVC>(conf.callbacks).EmitCall(code);
if (conf.enable_cycle_counting) {
@ -767,7 +767,7 @@ void A32EmitX64::EmitA32CallSupervisor(A32EmitContext& ctx, IR::Inst* inst) {
void A32EmitX64::EmitA32ExceptionRaised(A32EmitContext& ctx, IR::Inst* inst) {
code.SwitchMxcsrOnExit();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
if (conf.enable_cycle_counting) {
code.mov(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)]);
code.sub(code.ABI_PARAM2, qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)]);
@ -797,7 +797,7 @@ static u32 GetFpscrImpl(A32JitState* jit_state) {
}
void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst);
ctx.reg_alloc.HostCall(code, inst);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
@ -810,7 +810,7 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) {
void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, args[0]);
ctx.reg_alloc.HostCall(code, nullptr, args[0]);
code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR);
code.CallFunction(&SetFpscrImpl);
@ -818,17 +818,17 @@ void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
}
void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(tmp, NZCV::x64_mask);
code.pext(tmp, value, tmp);
@ -838,7 +838,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
return;
}
const Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.and_(value, NZCV::x64_mask);
code.imul(value, value, NZCV::from_x64_multiplier);
@ -851,7 +851,7 @@ static void EmitCoprocessorException() {
}
static void CallCoprocCallback(BlockOfCode& code, RegAlloc& reg_alloc, A32::Coprocessor::Callback callback, IR::Inst* inst = nullptr, std::optional<Argument::copyable_reference> arg0 = {}, std::optional<Argument::copyable_reference> arg1 = {}) {
reg_alloc.HostCall(inst, {}, arg0, arg1);
reg_alloc.HostCall(code, inst, {}, arg0, arg1);
if (callback.user_arg) {
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(*callback.user_arg));
@ -914,8 +914,8 @@ void A32EmitX64::EmitA32CoprocSendOneWord(A32EmitContext& ctx, IR::Inst* inst) {
}
if (const auto destination_ptr = std::get_if<u32*>(&action)) {
const Xbyak::Reg32 reg_word = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg32 reg_word = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code);
code.mov(reg_destination_addr, reinterpret_cast<u64>(*destination_ptr));
code.mov(code.dword[reg_destination_addr], reg_word);
@ -954,9 +954,9 @@ void A32EmitX64::EmitA32CoprocSendTwoWords(A32EmitContext& ctx, IR::Inst* inst)
}
if (const auto destination_ptrs = std::get_if<std::array<u32*, 2>>(&action)) {
const Xbyak::Reg32 reg_word1 = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 reg_word2 = ctx.reg_alloc.UseGpr(args[2]).cvt32();
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg32 reg_word1 = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg32 reg_word2 = ctx.reg_alloc.UseGpr(code, args[2]).cvt32();
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code);
code.mov(reg_destination_addr, reinterpret_cast<u64>((*destination_ptrs)[0]));
code.mov(code.dword[reg_destination_addr], reg_word1);
@ -998,13 +998,13 @@ void A32EmitX64::EmitA32CoprocGetOneWord(A32EmitContext& ctx, IR::Inst* inst) {
}
if (const auto source_ptr = std::get_if<u32*>(&action)) {
const Xbyak::Reg32 reg_word = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg64 reg_source_addr = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg32 reg_word = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg64 reg_source_addr = ctx.reg_alloc.ScratchGpr(code);
code.mov(reg_source_addr, reinterpret_cast<u64>(*source_ptr));
code.mov(reg_word, code.dword[reg_source_addr]);
ctx.reg_alloc.DefineValue(inst, reg_word);
ctx.reg_alloc.DefineValue(code, inst, reg_word);
return;
}
@ -1038,9 +1038,9 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) {
}
if (const auto source_ptrs = std::get_if<std::array<u32*, 2>>(&action)) {
const Xbyak::Reg64 reg_result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 reg_tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 reg_result = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 reg_destination_addr = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 reg_tmp = ctx.reg_alloc.ScratchGpr(code);
code.mov(reg_destination_addr, reinterpret_cast<u64>((*source_ptrs)[1]));
code.mov(reg_result.cvt32(), code.dword[reg_destination_addr]);
@ -1049,7 +1049,7 @@ void A32EmitX64::EmitA32CoprocGetTwoWords(A32EmitContext& ctx, IR::Inst* inst) {
code.mov(reg_tmp.cvt32(), code.dword[reg_destination_addr]);
code.or_(reg_result, reg_tmp);
ctx.reg_alloc.DefineValue(inst, reg_result);
ctx.reg_alloc.DefineValue(code, inst, reg_result);
return;
}

View File

@ -91,7 +91,7 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
return gprs;
}();
new (&this->reg_alloc) RegAlloc{&code, gpr_order, any_xmm};
new (&this->reg_alloc) RegAlloc{gpr_order, any_xmm};
A64EmitContext ctx{conf, reg_alloc, block};
// Start emitting.
@ -159,7 +159,7 @@ finish_this_inst:
}
code.int3();
const size_t size = static_cast<size_t>(code.getCurr() - entrypoint);
const size_t size = size_t(code.getCurr() - entrypoint);
const A64::LocationDescriptor descriptor{block.Location()};
const A64::LocationDescriptor end_location{block.EndLocation()};
@ -266,25 +266,25 @@ void A64EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(code, args[0]).cvt8();
code.mov(code.byte[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, check_bit)], to_store);
}
void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
code.shr(result, NZCV::x64_c_flag_bit);
code.and_(result, 1);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(nzcv_raw, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(tmp, NZCV::x64_mask);
code.pext(nzcv_raw, nzcv_raw, tmp);
code.shl(nzcv_raw, 28);
@ -294,16 +294,16 @@ void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
code.and_(nzcv_raw, NZCV::arm_mask);
}
ctx.reg_alloc.DefineValue(inst, nzcv_raw);
ctx.reg_alloc.DefineValue(code, inst, nzcv_raw);
}
void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(nzcv_raw, 28);
if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(tmp, NZCV::x64_mask);
code.pdep(nzcv_raw, nzcv_raw, tmp);
} else {
@ -315,63 +315,63 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], to_store);
}
void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movd(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movq(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movaps(result, addr);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
static u32 GetFPSRImpl(A64JitState* jit_state) {
@ -379,7 +379,7 @@ static u32 GetFPSRImpl(A64JitState* jit_state) {
}
void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst);
ctx.reg_alloc.HostCall(code, inst);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
code.CallFunction(GetFPSRImpl);
@ -393,7 +393,7 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
code.mov(addr, args[1].GetImmediateS32());
} else {
// TODO: zext tracking, xmm variant
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(args[1]);
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseScratchGpr(code, args[1]);
code.mov(to_store.cvt32(), to_store.cvt32());
code.mov(addr, to_store);
}
@ -405,11 +405,11 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
if (args[1].FitsInImmediateS32()) {
code.mov(addr, args[1].GetImmediateS32());
} else if (args[1].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
} else if (args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movq(addr, to_store);
} else {
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[1]);
code.mov(addr, to_store);
}
}
@ -419,8 +419,8 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
// TODO: Optimize
code.pxor(tmp, tmp);
code.movss(tmp, to_store);
@ -432,7 +432,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(code, args[1]);
code.movq(to_store, to_store); // TODO: Remove when able
code.movaps(addr, to_store);
}
@ -442,7 +442,7 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[1]);
code.movaps(addr, to_store);
}
@ -451,11 +451,11 @@ void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)];
if (args[0].FitsInImmediateS32()) {
code.mov(addr, args[0].GetImmediateS32());
} else if (args[0].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
} else if (args[0].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
code.movq(addr, to_store);
} else {
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[0]);
code.mov(addr, to_store);
}
}
@ -466,7 +466,7 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) {
void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.CallFunction(SetFPCRImpl);
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
@ -478,7 +478,7 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0]);
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
code.CallFunction(SetFPSRImpl);
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
@ -489,17 +489,17 @@ void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)];
if (args[0].FitsInImmediateS32()) {
code.mov(addr, args[0].GetImmediateS32());
} else if (args[0].IsInXmm()) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
} else if (args[0].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(code, args[0]);
code.movq(addr, to_store);
} else {
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 to_store = ctx.reg_alloc.UseGpr(code, args[0]);
code.mov(addr, to_store);
}
}
void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate());
const u32 imm = args[0].GetImmediateU32();
@ -511,7 +511,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
}
void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ASSERT(args[0].IsImmediate() && args[1].IsImmediate());
const u64 pc = args[0].GetImmediateU64();
@ -524,13 +524,13 @@ void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
void A64EmitX64::EmitA64DataCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[1], args[2]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[1], args[2]);
Devirtualize<&A64::UserCallbacks::DataCacheOperationRaised>(conf.callbacks).EmitCall(code);
}
void A64EmitX64::EmitA64InstructionCacheOperationRaised(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, {}, args[0], args[1]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[0], args[1]);
Devirtualize<&A64::UserCallbacks::InstructionCacheOperationRaised>(conf.callbacks).EmitCall(code);
}
@ -548,18 +548,18 @@ void A64EmitX64::EmitA64InstructionSynchronizationBarrier(A64EmitContext& ctx, I
return;
}
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
Devirtualize<&A64::UserCallbacks::InstructionSynchronizationBarrierRaised>(conf.callbacks).EmitCall(code);
}
void A64EmitX64::EmitA64GetCNTFRQ(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, conf.cntfrq_el0);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetCNTPCT(A64EmitContext& ctx, IR::Inst* inst) {
ctx.reg_alloc.HostCall(inst);
ctx.reg_alloc.HostCall(code, inst);
if (!conf.wall_clock_cntpct) {
code.UpdateTicks();
}
@ -567,43 +567,43 @@ void A64EmitX64::EmitA64GetCNTPCT(A64EmitContext& ctx, IR::Inst* inst) {
}
void A64EmitX64::EmitA64GetCTR(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, conf.ctr_el0);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetDCZID(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.mov(result, conf.dczid_el0);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetTPIDR(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
if (conf.tpidr_el0) {
code.mov(result, u64(conf.tpidr_el0));
code.mov(result, qword[result]);
} else {
code.xor_(result.cvt32(), result.cvt32());
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64GetTPIDRRO(A64EmitContext& ctx, IR::Inst* inst) {
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
if (conf.tpidrro_el0) {
code.mov(result, u64(conf.tpidrro_el0));
code.mov(result, qword[result]);
} else {
code.xor_(result.cvt32(), result.cvt32());
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void A64EmitX64::EmitA64SetTPIDR(A64EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 addr = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(code, args[0]);
const Xbyak::Reg64 addr = ctx.reg_alloc.ScratchGpr(code);
if (conf.tpidr_el0) {
code.mov(addr, u64(conf.tpidr_el0));
code.mov(qword[addr], value);

View File

@ -68,7 +68,7 @@ void EmitX64::EmitVoid(EmitContext&, IR::Inst*) {
void EmitX64::EmitIdentity(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (!args[0].IsImmediate()) {
ctx.reg_alloc.DefineValue(inst, args[0]);
ctx.reg_alloc.DefineValue(code, inst, args[0]);
}
}
@ -78,7 +78,7 @@ void EmitX64::EmitBreakpoint(EmitContext&, IR::Inst*) {
void EmitX64::EmitCallHostFunction(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(nullptr, args[1], args[2], args[3]);
ctx.reg_alloc.HostCall(code, nullptr, args[1], args[2], args[3]);
code.mov(rax, args[0].GetImmediateU64());
code.call(rax);
}
@ -120,7 +120,7 @@ void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) {
code.lea(rax, ptr[rsp + sizeof(RegisterData) + offsetof(StackLayout, spill)]);
code.mov(qword[rsp + offsetof(RegisterData, spill)], rax);
reg_alloc.EmitVerboseDebuggingOutput();
reg_alloc.EmitVerboseDebuggingOutput(code);
for (int i = 0; i < 16; i++) {
if (rsp.getIdx() == i) {
@ -140,9 +140,9 @@ void EmitX64::EmitPushRSB(EmitContext& ctx, IR::Inst* inst) {
ASSERT(args[0].IsImmediate());
const u64 unique_hash_of_target = args[0].GetImmediateU64();
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr();
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
const Xbyak::Reg64 loc_desc_reg = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 index_reg = ctx.reg_alloc.ScratchGpr(code);
PushRSBHelper(loc_desc_reg, index_reg, IR::LocationDescriptor{unique_hash_of_target});
}
@ -190,12 +190,12 @@ void EmitX64::EmitGetNZFromOp(EmitContext& ctx, IR::Inst* inst) {
}
}();
const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
const Xbyak::Reg64 nz = ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize);
code.test(value, value);
code.lahf();
code.movzx(eax, ah);
ctx.reg_alloc.DefineValue(inst, nz);
ctx.reg_alloc.DefineValue(code, inst, nz);
}
void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
@ -221,27 +221,27 @@ void EmitX64::EmitGetNZCVFromOp(EmitContext& ctx, IR::Inst* inst) {
}
}();
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[0]).changeBit(bitsize);
const Xbyak::Reg64 nzcv = ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[0]).changeBit(bitsize);
code.test(value, value);
code.lahf();
code.xor_(al, al);
ctx.reg_alloc.DefineValue(inst, nzcv);
ctx.reg_alloc.DefineValue(code, inst, nzcv);
}
void EmitX64::EmitGetCFlagFromNZCV(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
const u32 value = (args[0].GetImmediateU32() >> 8) & 1;
code.mov(result, value);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(result, 8);
code.and_(result, 1);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
}
@ -249,30 +249,30 @@ void EmitX64::EmitNZCVFromPackedFlags(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsImmediate()) {
const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 nzcv = ctx.reg_alloc.ScratchGpr(code).cvt32();
u32 value = 0;
value |= mcl::bit::get_bit<31>(args[0].GetImmediateU32()) ? (1 << 15) : 0;
value |= mcl::bit::get_bit<30>(args[0].GetImmediateU32()) ? (1 << 14) : 0;
value |= mcl::bit::get_bit<29>(args[0].GetImmediateU32()) ? (1 << 8) : 0;
value |= mcl::bit::get_bit<28>(args[0].GetImmediateU32()) ? (1 << 0) : 0;
code.mov(nzcv, value);
ctx.reg_alloc.DefineValue(inst, nzcv);
ctx.reg_alloc.DefineValue(code, inst, nzcv);
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.shr(nzcv, 28);
code.mov(tmp, NZCV::x64_mask);
code.pdep(nzcv, nzcv, tmp);
ctx.reg_alloc.DefineValue(inst, nzcv);
ctx.reg_alloc.DefineValue(code, inst, nzcv);
} else {
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 nzcv = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
code.shr(nzcv, 28);
code.imul(nzcv, nzcv, NZCV::to_x64_multiplier);
code.and_(nzcv, NZCV::x64_mask);
ctx.reg_alloc.DefineValue(inst, nzcv);
ctx.reg_alloc.DefineValue(code, inst, nzcv);
}
}

View File

@ -23,13 +23,13 @@ using AESFn = void(AES::State&, const AES::State&);
static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, BlockOfCode& code, IR::Inst* inst, AESFn fn) {
constexpr u32 stack_space = static_cast<u32>(sizeof(AES::State)) * 2;
const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm input = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
ctx.reg_alloc.AllocStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(code, stack_space + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE]);
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + sizeof(AES::State)]);
@ -37,22 +37,22 @@ static void EmitAESFunction(RegAlloc::ArgumentInfo args, EmitContext& ctx, Block
code.CallFunction(fn);
code.movaps(result, xword[rsp + ABI_SHADOW_SPACE]);
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(code, stack_space + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitAESDecryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AES)) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
code.pxor(zero, zero);
code.aesdeclast(data, zero);
ctx.reg_alloc.DefineValue(inst, data);
ctx.reg_alloc.DefineValue(code, inst, data);
return;
}
@ -63,13 +63,13 @@ void EmitX64::EmitAESEncryptSingleRound(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AES)) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
code.pxor(zero, zero);
code.aesenclast(data, zero);
ctx.reg_alloc.DefineValue(inst, data);
ctx.reg_alloc.DefineValue(code, inst, data);
return;
}
@ -80,11 +80,11 @@ void EmitX64::EmitAESInverseMixColumns(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AES)) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
code.aesimc(data, data);
ctx.reg_alloc.DefineValue(inst, data);
ctx.reg_alloc.DefineValue(code, inst, data);
return;
}
@ -95,14 +95,14 @@ void EmitX64::EmitAESMixColumns(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AES)) {
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
code.pxor(zero, zero);
code.aesdeclast(data, zero);
code.aesenc(data, zero);
ctx.reg_alloc.DefineValue(inst, data);
ctx.reg_alloc.DefineValue(code, inst, data);
return;
}

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
@ -19,16 +22,16 @@ namespace CRC32 = Common::Crypto::CRC32;
static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::SSE42)) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size);
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(data_size);
if (data_size != 64) {
code.crc32(crc, value);
} else {
code.crc32(crc.cvt64(), value);
}
ctx.reg_alloc.DefineValue(inst, crc);
ctx.reg_alloc.DefineValue(code, inst, crc);
} else {
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
ctx.reg_alloc.HostCall(code, inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3.cvt32(), data_size / CHAR_BIT); //zext
code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
}
@ -38,11 +41,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size < 32) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseScratchGpr(code, args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
@ -64,12 +67,12 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
ctx.reg_alloc.DefineValue(code, inst, crc);
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
@ -82,12 +85,12 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
ctx.reg_alloc.DefineValue(code, inst, crc);
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(code, args[1]);
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_const = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(xmm_const, code.Const(xword, 0xb4e5b025'f7011641, 0x00000001'DB710641));
@ -100,9 +103,9 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
code.pextrd(crc, xmm_value, 2);
ctx.reg_alloc.DefineValue(inst, crc);
ctx.reg_alloc.DefineValue(code, inst, crc);
} else {
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
ctx.reg_alloc.HostCall(code, inst, args[0], args[1], {});
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
code.CallFunction(&CRC32::ComputeCRC32ISO);
}

View File

@ -54,14 +54,14 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
if (!conf.page_table && !fastmem_marker) {
// Neither fastmem nor page table: Use callbacks
if constexpr (bitsize == 128) {
ctx.reg_alloc.HostCall(nullptr, {}, args[1]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[1]);
if (ordered) {
code.mfence();
}
code.CallFunction(memory_read_128);
ctx.reg_alloc.DefineValue(inst, xmm1);
ctx.reg_alloc.DefineValue(code, inst, xmm1);
} else {
ctx.reg_alloc.HostCall(inst, {}, args[1]);
ctx.reg_alloc.HostCall(code, inst, {}, args[1]);
if (ordered) {
code.mfence();
}
@ -74,14 +74,14 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
if (ordered && bitsize == 128) {
// Required for atomic 128-bit loads/stores
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
}
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx();
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm(code).getIdx() : ctx.reg_alloc.ScratchGpr(code).getIdx();
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
@ -126,9 +126,9 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
code.L(*end);
if constexpr (bitsize == 128) {
ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx});
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Xmm{value_idx});
} else {
ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx});
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Reg64{value_idx});
}
}
@ -141,13 +141,13 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
if (!conf.page_table && !fastmem_marker) {
// Neither fastmem nor page table: Use callbacks
if constexpr (bitsize == 128) {
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
ctx.reg_alloc.Use(code, args[1], ABI_PARAM2);
ctx.reg_alloc.Use(code, args[2], HostLoc::XMM1);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
code.CallFunction(memory_write_128);
} else {
ctx.reg_alloc.HostCall(nullptr, {}, args[1], args[2]);
ctx.reg_alloc.HostCall(code, nullptr, {}, args[1], args[2]);
Devirtualize<callback>(conf.callbacks).EmitCall(code);
}
if (ordered) {
@ -159,16 +159,16 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
if (ordered && bitsize == 128) {
// Required for atomic 128-bit loads/stores
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
}
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
const int value_idx = bitsize == 128
? ctx.reg_alloc.UseXmm(args[2]).getIdx()
: (ordered ? ctx.reg_alloc.UseScratchGpr(args[2]).getIdx() : ctx.reg_alloc.UseGpr(args[2]).getIdx());
? ctx.reg_alloc.UseXmm(code, args[2]).getIdx()
: (ordered ? ctx.reg_alloc.UseScratchGpr(code, args[2]).getIdx() : ctx.reg_alloc.UseGpr(code, args[2]).getIdx());
const auto wrapped_fn = write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
@ -222,7 +222,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
if constexpr (bitsize != 128) {
using T = mcl::unsigned_integer_of_size<bitsize>;
ctx.reg_alloc.HostCall(inst, {}, args[1]);
ctx.reg_alloc.HostCall(code, inst, {}, args[1]);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
@ -237,14 +237,14 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
});
code.ZeroExtendFrom(bitsize, code.ABI_RETURN);
} else {
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
ctx.reg_alloc.Use(code, args[1], ABI_PARAM2);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(code, 16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
if (ordered) {
code.mfence();
@ -256,9 +256,9 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
});
});
code.movups(result, xword[rsp + ABI_SHADOW_SPACE]);
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
EmitCheckMemoryAbort(ctx, inst);
@ -271,15 +271,15 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
const bool ordered = IsOrdered(args[3].GetImmediateAccType());
if constexpr (bitsize == 128) {
ctx.reg_alloc.Use(args[1], ABI_PARAM2);
ctx.reg_alloc.Use(args[2], HostLoc::XMM1);
ctx.reg_alloc.Use(code, args[1], ABI_PARAM2);
ctx.reg_alloc.Use(code, args[2], HostLoc::XMM1);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(inst);
ctx.reg_alloc.HostCall(code, inst);
} else {
ctx.reg_alloc.HostCall(inst, {}, args[1], args[2]);
ctx.reg_alloc.HostCall(code, inst, {}, args[1], args[2]);
}
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
Xbyak::Label end;
code.mov(code.ABI_RETURN, u32(1));
code.movzx(tmp.cvt32(), code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)]);
@ -299,7 +299,7 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
code.mfence();
}
} else {
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.AllocStackSpace(code, 16 + ABI_SHADOW_SPACE);
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
code.movaps(xword[code.ABI_PARAM3], xmm1);
code.CallLambda([](AxxUserConfig& conf, Axx::VAddr vaddr, Vector& value) -> u32 {
@ -310,7 +310,7 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
if (ordered) {
code.mfence();
}
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
ctx.reg_alloc.ReleaseStackSpace(code, 16 + ABI_SHADOW_SPACE);
}
code.L(end);
@ -330,16 +330,16 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
if constexpr (ordered && bitsize == 128) {
// Required for atomic 128-bit loads/stores
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
}
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm().getIdx() : ctx.reg_alloc.ScratchGpr().getIdx();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
const int value_idx = bitsize == 128 ? ctx.reg_alloc.ScratchXmm(code).getIdx() : ctx.reg_alloc.ScratchGpr(code).getIdx();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(code);
const auto wrapped_fn = read_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value_idx)];
@ -386,9 +386,9 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
EmitExclusiveUnlock(code, conf, tmp, tmp2.cvt32());
if constexpr (bitsize == 128) {
ctx.reg_alloc.DefineValue(inst, Xbyak::Xmm{value_idx});
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Xmm{value_idx});
} else {
ctx.reg_alloc.DefineValue(inst, Xbyak::Reg64{value_idx});
ctx.reg_alloc.DefineValue(code, inst, Xbyak::Reg64{value_idx});
}
EmitCheckMemoryAbort(ctx, inst);
@ -407,19 +407,19 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
const auto value = [&] {
if constexpr (bitsize == 128) {
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(HostLoc::RDX);
return ctx.reg_alloc.UseXmm(args[2]);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RBX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RCX);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX);
return ctx.reg_alloc.UseXmm(code, args[2]);
} else {
ctx.reg_alloc.ScratchGpr(HostLoc::RAX);
return ctx.reg_alloc.UseGpr(args[2]);
ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX);
return ctx.reg_alloc.UseGpr(code, args[2]);
}
}();
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(code, args[1]);
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];
@ -518,7 +518,7 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
code.L(*end);
EmitExclusiveUnlock(code, conf, tmp, eax);
ctx.reg_alloc.DefineValue(inst, status);
ctx.reg_alloc.DefineValue(code, inst, status);
EmitCheckMemoryAbort(ctx, inst);
}

View File

@ -75,8 +75,8 @@ Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, EmitContext& ctx, size_t bitsiz
template<>
[[maybe_unused]] Xbyak::RegExp EmitVAddrLookup<A32EmitContext>(BlockOfCode& code, A32EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) {
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg32 tmp = ctx.conf.absolute_offset_page_table ? page.cvt32() : ctx.reg_alloc.ScratchGpr(code).cvt32();
EmitDetectMisalignedVAddr(code, ctx, bitsize, abort, vaddr, tmp.cvt64());
@ -105,8 +105,8 @@ template<>
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 tmp = ctx.conf.absolute_offset_page_table ? page : ctx.reg_alloc.ScratchGpr(code);
EmitDetectMisalignedVAddr(code, ctx, bitsize, abort, vaddr, tmp);
@ -116,7 +116,7 @@ template<>
} else if (ctx.conf.silently_mirror_page_table) {
if (valid_page_index_bits >= 32) {
if (code.HasHostFeature(HostFeature::BMI2)) {
const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr(code);
code.mov(bit_count, unused_top_bits);
code.bzhi(tmp, vaddr, bit_count);
code.shr(tmp, int(page_bits));
@ -168,7 +168,7 @@ template<>
return r13 + vaddr;
} else if (ctx.conf.silently_mirror_fastmem) {
if (!tmp) {
tmp = ctx.reg_alloc.ScratchGpr();
tmp = ctx.reg_alloc.ScratchGpr(code);
}
if (unused_top_bits < 32) {
code.mov(*tmp, vaddr);
@ -189,7 +189,7 @@ template<>
} else {
// TODO: Consider having TEST as above but coalesce 64-bit constant in register allocator
if (!tmp) {
tmp = ctx.reg_alloc.ScratchGpr();
tmp = ctx.reg_alloc.ScratchGpr(code);
}
code.mov(*tmp, vaddr);
code.shr(*tmp, int(ctx.conf.fastmem_address_space_bits));

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
@ -16,14 +19,14 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
code.paddb(xmm_a, xmm_b);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqb(ones, ones);
@ -32,21 +35,21 @@ void EmitX64::EmitPackedAddU8(EmitContext& ctx, IR::Inst* inst) {
code.pcmpeqb(xmm_ge, xmm_b);
code.pxor(xmm_ge, ones);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqb(xmm0, xmm0);
@ -54,27 +57,27 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
code.paddsb(xmm_ge, xmm_b);
code.pcmpgtb(xmm_ge, xmm0);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
code.paddb(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
code.paddw(xmm_a, xmm_b);
if (ge_inst) {
if (code.HasHostFeature(HostFeature::SSE41)) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqb(ones, ones);
@ -83,10 +86,10 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
code.pcmpeqw(xmm_ge, xmm_b);
code.pxor(xmm_ge, ones);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
} else {
const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp_a = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm tmp_b = ctx.reg_alloc.ScratchXmm(code);
// !(b <= a+b) == b > a+b
code.movdqa(tmp_a, xmm_a);
@ -95,22 +98,22 @@ void EmitX64::EmitPackedAddU16(EmitContext& ctx, IR::Inst* inst) {
code.paddw(tmp_b, code.Const(xword, 0x80008000));
code.pcmpgtw(tmp_b, tmp_a); // *Signed* comparison!
ctx.reg_alloc.DefineValue(ge_inst, tmp_b);
ctx.reg_alloc.DefineValue(code, ge_inst, tmp_b);
}
}
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqw(xmm0, xmm0);
@ -118,45 +121,45 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
code.paddsw(xmm_ge, xmm_b);
code.pcmpgtw(xmm_ge, xmm0);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
code.paddw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(xmm_ge, xmm_a);
code.pmaxub(xmm_ge, xmm_b);
code.pcmpeqb(xmm_ge, xmm_a);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
code.psubb(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqb(xmm0, xmm0);
@ -164,12 +167,12 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
code.psubsb(xmm_ge, xmm_b);
code.pcmpgtb(xmm_ge, xmm0);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
code.psubb(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
@ -177,19 +180,19 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
if (!ge_inst) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
return;
}
if (code.HasHostFeature(HostFeature::SSE41)) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(xmm_ge, xmm_a);
code.pmaxuw(xmm_ge, xmm_b); // Requires SSE 4.1
@ -197,15 +200,15 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
return;
}
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
// (a >= b) == !(b > a)
code.pcmpeqb(ones, ones);
@ -217,19 +220,19 @@ void EmitX64::EmitPackedSubU16(EmitContext& ctx, IR::Inst* inst) {
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if (ge_inst) {
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm(code);
code.pcmpeqw(xmm0, xmm0);
@ -237,21 +240,21 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
code.psubsw(xmm_ge, xmm_b);
code.pcmpgtw(xmm_ge, xmm0);
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
ctx.reg_alloc.DefineValue(code, ge_inst, xmm_ge);
}
code.psubw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsInXmm() || args[1].IsInXmm()) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm();
if (args[0].IsInXmm(ctx.reg_alloc) || args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm ones = ctx.reg_alloc.ScratchXmm(code);
// Since,
// pavg(a, b) == (a + b + 1) >> 1
@ -264,11 +267,11 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
code.pavgb(xmm_a, xmm_b);
code.pxor(xmm_a, ones);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
} else {
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 and_a_b = reg_a;
const Xbyak::Reg32 result = reg_a;
@ -284,17 +287,17 @@ void EmitX64::EmitPackedHalvingAddU8(EmitContext& ctx, IR::Inst* inst) {
code.and_(xor_a_b, 0x7F7F7F7F);
code.add(result, xor_a_b);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
}
void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (args[0].IsInXmm() || args[1].IsInXmm()) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
if (args[0].IsInXmm(ctx.reg_alloc) || args[1].IsInXmm(ctx.reg_alloc)) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
code.movdqa(tmp, xmm_a);
code.pand(xmm_a, xmm_b);
@ -302,11 +305,11 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
code.psrlw(tmp, 1);
code.paddw(xmm_a, tmp);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
} else {
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 and_a_b = reg_a;
const Xbyak::Reg32 result = reg_a;
@ -322,19 +325,19 @@ void EmitX64::EmitPackedHalvingAddU16(EmitContext& ctx, IR::Inst* inst) {
code.and_(xor_a_b, 0x7FFF7FFF);
code.add(result, xor_a_b);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
}
void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 reg_b = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg32 xor_a_b = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 and_a_b = reg_a;
const Xbyak::Reg32 result = reg_a;
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr(code).cvt32();
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result.
@ -352,15 +355,15 @@ void EmitX64::EmitPackedHalvingAddS8(EmitContext& ctx, IR::Inst* inst) {
code.add(result, xor_a_b);
code.xor_(result, carry);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
// This relies on the equality x+y == ((x&y) << 1) + (x^y).
// Note that x^y always contains the LSB of the result.
@ -373,14 +376,14 @@ void EmitX64::EmitPackedHalvingAddS16(EmitContext& ctx, IR::Inst* inst) {
code.psraw(tmp, 1);
code.paddw(xmm_a, tmp);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
@ -403,16 +406,16 @@ void EmitX64::EmitPackedHalvingSubU8(EmitContext& ctx, IR::Inst* inst) {
code.xor_(minuend, 0x80808080);
// minuend now contains the desired result.
ctx.reg_alloc.DefineValue(inst, minuend);
ctx.reg_alloc.DefineValue(code, inst, minuend);
}
void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 minuend = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 subtrahend = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 carry = ctx.reg_alloc.ScratchGpr(code).cvt32();
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
@ -439,14 +442,14 @@ void EmitX64::EmitPackedHalvingSubS8(EmitContext& ctx, IR::Inst* inst) {
code.xor_(minuend, 0x80808080);
code.xor_(minuend, carry);
ctx.reg_alloc.DefineValue(inst, minuend);
ctx.reg_alloc.DefineValue(code, inst, minuend);
}
void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(code, args[1]);
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
@ -462,14 +465,14 @@ void EmitX64::EmitPackedHalvingSubU16(EmitContext& ctx, IR::Inst* inst) {
code.psubw(minuend, subtrahend);
ctx.reg_alloc.DefineValue(inst, minuend);
ctx.reg_alloc.DefineValue(code, inst, minuend);
}
void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm minuend = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm subtrahend = ctx.reg_alloc.UseScratchXmm(code, args[1]);
// This relies on the equality x-y == (x^y) - (((x^y)&y) << 1).
// Note that x^y always contains the LSB of the result.
@ -485,17 +488,17 @@ void EmitX64::EmitPackedHalvingSubS16(EmitContext& ctx, IR::Inst* inst) {
code.psubw(minuend, subtrahend);
ctx.reg_alloc.DefineValue(inst, minuend);
ctx.reg_alloc.DefineValue(code, inst, minuend);
}
static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, bool hi_is_sum, bool is_signed, bool is_halving) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto ge_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetGEFromOp);
const Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a_hi = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 reg_b_hi = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
const Xbyak::Reg32 reg_a_lo = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 reg_b_lo = ctx.reg_alloc.ScratchGpr(code).cvt32();
Xbyak::Reg32 reg_sum, reg_diff;
if (is_signed) {
@ -543,7 +546,7 @@ static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
code.and_(ge_diff, hi_is_sum ? 0x0000FFFF : 0xFFFF0000);
code.or_(ge_sum, ge_diff);
ctx.reg_alloc.DefineValue(ge_inst, ge_sum);
ctx.reg_alloc.DefineValue(code, ge_inst, ge_sum);
}
if (is_halving) {
@ -557,7 +560,7 @@ static void EmitPackedSubAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
// Merge them.
code.shld(reg_a_hi, reg_a_lo, 16);
ctx.reg_alloc.DefineValue(inst, reg_a_hi);
ctx.reg_alloc.DefineValue(code, inst, reg_a_hi);
}
void EmitX64::EmitPackedAddSubU16(EmitContext& ctx, IR::Inst* inst) {
@ -595,12 +598,12 @@ void EmitX64::EmitPackedHalvingSubAddS16(EmitContext& ctx, IR::Inst* inst) {
static void EmitPackedOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
(code.*fn)(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSaturatedAddU8(EmitContext& ctx, IR::Inst* inst) {
@ -638,9 +641,9 @@ void EmitX64::EmitPackedSaturatedSubS16(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitPackedAbsDiffSumU8(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
// TODO: Optimize with zero-extension detection
code.movaps(tmp, code.Const(xword, 0x0000'0000'ffff'ffff));
@ -648,45 +651,45 @@ void EmitX64::EmitPackedAbsDiffSumU8(EmitContext& ctx, IR::Inst* inst) {
code.pand(xmm_b, tmp);
code.psadbw(xmm_a, xmm_b);
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
}
void EmitX64::EmitPackedSelect(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const size_t num_args_in_xmm = args[0].IsInXmm() + args[1].IsInXmm() + args[2].IsInXmm();
const size_t num_args_in_xmm = args[0].IsInXmm(ctx.reg_alloc) + args[1].IsInXmm(ctx.reg_alloc) + args[2].IsInXmm(ctx.reg_alloc);
if (num_args_in_xmm >= 2) {
const Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm to = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(args[2]);
const Xbyak::Xmm ge = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm to = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm from = ctx.reg_alloc.UseScratchXmm(code, args[2]);
code.pand(from, ge);
code.pandn(ge, to);
code.por(from, ge);
ctx.reg_alloc.DefineValue(inst, from);
ctx.reg_alloc.DefineValue(code, inst, from);
} else if (code.HasHostFeature(HostFeature::BMI1)) {
const Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
const Xbyak::Reg32 ge = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
const Xbyak::Reg32 to = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(code, args[2]).cvt32();
code.and_(from, ge);
code.andn(to, ge, to);
code.or_(from, to);
ctx.reg_alloc.DefineValue(inst, from);
ctx.reg_alloc.DefineValue(code, inst, from);
} else {
const Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(args[1]).cvt32();
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(args[2]).cvt32();
const Xbyak::Reg32 ge = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 to = ctx.reg_alloc.UseGpr(code, args[1]).cvt32();
const Xbyak::Reg32 from = ctx.reg_alloc.UseScratchGpr(code, args[2]).cvt32();
code.and_(from, ge);
code.not_(ge);
code.and_(ge, to);
code.or_(from, ge);
ctx.reg_alloc.DefineValue(inst, from);
ctx.reg_alloc.DefineValue(code, inst, from);
}
}

View File

@ -34,9 +34,9 @@ template<Op op, size_t size, bool has_overflow_inst = false>
void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
Xbyak::Reg addend = ctx.reg_alloc.UseGpr(args[1]).changeBit(size);
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr().changeBit(size);
Xbyak::Reg result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(size);
Xbyak::Reg addend = ctx.reg_alloc.UseGpr(code, args[1]).changeBit(size);
Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(code).changeBit(size);
constexpr u64 int_max = static_cast<u64>((std::numeric_limits<mcl::signed_integer_of_size<size>>::max)());
if constexpr (size < 64) {
@ -66,21 +66,21 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
code.seto(overflow.cvt8());
if constexpr (has_overflow_inst) {
if (const auto overflow_inst = inst->GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp)) {
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.reg_alloc.DefineValue(code, overflow_inst, overflow);
}
} else {
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
template<Op op, size_t size>
void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(args[0]).changeBit(size);
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(args[1]).changeBit(size);
Xbyak::Reg op_result = ctx.reg_alloc.UseScratchGpr(code, args[0]).changeBit(size);
Xbyak::Reg addend = ctx.reg_alloc.UseScratchGpr(code, args[1]).changeBit(size);
constexpr u64 boundary = op == Op::Add ? (std::numeric_limits<mcl::unsigned_integer_of_size<size>>::max)() : 0;
@ -96,11 +96,11 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
code.cmovae(addend, op_result);
}
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr(code);
code.setb(overflow.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
ctx.reg_alloc.DefineValue(inst, addend);
ctx.reg_alloc.DefineValue(code, inst, addend);
}
} // anonymous namespace
@ -126,10 +126,10 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
overflow_inst->ReplaceUsesWith(no_overflow);
}
// TODO: DefineValue directly on Argument
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(args[0]);
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 source = ctx.reg_alloc.UseGpr(code, args[0]);
code.mov(result.cvt32(), source.cvt32());
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@ -137,9 +137,9 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const u32 positive_saturated_value = (1u << (N - 1)) - 1;
const u32 negative_saturated_value = 1u << (N - 1);
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr(code).cvt32();
// overflow now contains a value between 0 and mask if it was originally between {negative,positive}_saturated_value.
code.lea(overflow, code.ptr[reg_a.cvt64() + negative_saturated_value]);
@ -156,10 +156,10 @@ void EmitX64::EmitSignedSaturation(EmitContext& ctx, IR::Inst* inst) {
if (overflow_inst) {
code.seta(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.reg_alloc.DefineValue(code, overflow_inst, overflow);
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
@ -171,9 +171,9 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
const u32 saturated_value = (1u << N) - 1;
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr(code).cvt32();
const Xbyak::Reg32 reg_a = ctx.reg_alloc.UseGpr(code, args[0]).cvt32();
const Xbyak::Reg32 overflow = ctx.reg_alloc.ScratchGpr(code).cvt32();
// Pseudocode: result = clamp(reg_a, 0, saturated_value);
code.xor_(overflow, overflow);
@ -185,10 +185,10 @@ void EmitX64::EmitUnsignedSaturation(EmitContext& ctx, IR::Inst* inst) {
if (overflow_inst) {
code.seta(overflow.cvt8());
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
ctx.reg_alloc.DefineValue(code, overflow_inst, overflow);
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitSignedSaturatedAdd8(EmitContext& ctx, IR::Inst* inst) {
@ -210,9 +210,9 @@ void EmitX64::EmitSignedSaturatedAdd64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(args[1]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 x = ctx.reg_alloc.UseScratchGpr(code, args[0]).cvt32();
const Xbyak::Reg32 y = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32();
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.movsx(x, x.cvt16());
code.movsx(y, y.cvt16());
@ -228,15 +228,15 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx,
code.sets(tmp.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
ctx.reg_alloc.DefineValue(inst, y);
ctx.reg_alloc.DefineValue(code, inst, y);
}
void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(args[0]);
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(args[1]);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 x = ctx.reg_alloc.UseScratchGpr(code, args[0]);
const Xbyak::Reg64 y = ctx.reg_alloc.UseScratchGpr(code, args[1]);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
code.movsxd(x, x.cvt32());
code.movsxd(y, y.cvt32());
@ -252,7 +252,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx,
code.sets(tmp.cvt8());
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
ctx.reg_alloc.DefineValue(inst, y);
ctx.reg_alloc.DefineValue(code, inst, y);
}
void EmitX64::EmitSignedSaturatedSub8(EmitContext& ctx, IR::Inst* inst) {

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2022 MerryMage
* SPDX-License-Identifier: 0BSD
@ -22,9 +25,9 @@ void EmitX64::EmitSHA256Hash(EmitContext& ctx, IR::Inst* inst) {
// y = h g f e
// w = wk3 wk2 wk1 wk0
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm w = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm w = ctx.reg_alloc.UseXmm(code, args[2]);
// x64 expects:
// 3 2 1 0
@ -45,7 +48,7 @@ void EmitX64::EmitSHA256Hash(EmitContext& ctx, IR::Inst* inst) {
code.shufps(y, x, part1 ? 0b10111011 : 0b00010001);
ctx.reg_alloc.DefineValue(inst, y);
ctx.reg_alloc.DefineValue(code, inst, y);
}
void EmitX64::EmitSHA256MessageSchedule0(EmitContext& ctx, IR::Inst* inst) {
@ -53,12 +56,12 @@ void EmitX64::EmitSHA256MessageSchedule0(EmitContext& ctx, IR::Inst* inst) {
ASSERT(code.HasHostFeature(HostFeature::SHA));
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]);
code.sha256msg1(x, y);
ctx.reg_alloc.DefineValue(inst, x);
ctx.reg_alloc.DefineValue(code, inst, x);
}
void EmitX64::EmitSHA256MessageSchedule1(EmitContext& ctx, IR::Inst* inst) {
@ -66,16 +69,16 @@ void EmitX64::EmitSHA256MessageSchedule1(EmitContext& ctx, IR::Inst* inst) {
ASSERT(code.HasHostFeature(HostFeature::SHA));
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm z = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm z = ctx.reg_alloc.UseXmm(code, args[2]);
code.movaps(xmm0, z);
code.palignr(xmm0, y, 4);
code.paddd(x, xmm0);
code.sha256msg2(x, z);
ctx.reg_alloc.DefineValue(inst, x);
ctx.reg_alloc.DefineValue(code, inst, x);
}
} // namespace Dynarmic::Backend::X64

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
* Copyright (c) 2018 MerryMage
* SPDX-License-Identifier: 0BSD
@ -13,7 +16,7 @@ namespace Dynarmic::Backend::X64 {
void EmitX64::EmitSM4AccessSubstitutionBox(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
ctx.reg_alloc.HostCall(inst, args[0]);
ctx.reg_alloc.HostCall(code, inst, args[0]);
code.CallFunction(&Common::Crypto::SM4::AccessSubstitutionBox);
code.movzx(code.ABI_RETURN.cvt32(), code.ABI_RETURN.cvt8());
}

File diff suppressed because it is too large Load Diff

View File

@ -96,7 +96,7 @@ void HandleNaNs(BlockOfCode& code, EmitContext& ctx, bool fpcr_controlled, std::
if (code.HasHostFeature(HostFeature::SSE41)) {
code.ptest(nan_mask, nan_mask);
} else {
const Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr().cvt32();
const Xbyak::Reg32 bitmask = ctx.reg_alloc.ScratchGpr(code).cvt32();
code.movmskps(bitmask, nan_mask);
code.cmp(bitmask, 0);
}
@ -312,13 +312,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
Xbyak::Xmm result;
if constexpr (std::is_member_function_pointer_v<Function>) {
result = ctx.reg_alloc.UseScratchXmm(args[0]);
result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
(code.*fn)(result);
});
} else {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
result = ctx.reg_alloc.ScratchXmm(code);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
fn(result, xmm_a);
});
@ -328,13 +328,13 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
if constexpr (std::is_member_function_pointer_v<Function>) {
code.movaps(result, xmm_a);
@ -352,7 +352,7 @@ void EmitTwoOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
HandleNaNs<fsize, 1>(code, ctx, fpcr_controlled, {result, xmm_a}, nan_mask, nan_handler);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
enum class CheckInputNaN {
@ -368,8 +368,8 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
const bool fpcr_controlled = args[2].GetImmediateU1();
if (ctx.FPCR(fpcr_controlled).DN() || ctx.HasOptimization(OptimizationFlag::Unsafe_InaccurateNaN)) {
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
if constexpr (std::is_member_function_pointer_v<Function>) {
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
@ -385,14 +385,14 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), xmm_a);
}
ctx.reg_alloc.DefineValue(inst, xmm_a);
ctx.reg_alloc.DefineValue(code, inst, xmm_a);
return;
}
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
code.movaps(result, xmm_a);
@ -422,7 +422,7 @@ void EmitThreeOpVectorOperation(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
template<typename F>
@ -448,16 +448,16 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak
template<size_t fpcr_controlled_arg_index = 1, typename F>
void EmitTwoOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, F lambda) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
const bool fpcr_controlled = args[fpcr_controlled_arg_index].GetImmediateU1();
EmitTwoOpFallbackWithoutRegAlloc(code, ctx, result, arg1, lambda, fpcr_controlled);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
template<typename Lambda>
@ -501,17 +501,17 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
template<typename Lambda>
void EmitThreeOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
const bool fpcr_controlled = args[2].GetImmediateU1();
EmitThreeOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, lambda, fpcr_controlled);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
enum class LoadPreviousResult {
@ -565,16 +565,16 @@ template<typename Lambda>
void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lambda lambda) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[3].GetImmediateU1();
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm arg1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm arg2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm arg3 = ctx.reg_alloc.UseXmm(code, args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
ctx.reg_alloc.EndOfAllocScope();
ctx.reg_alloc.HostCall(nullptr);
ctx.reg_alloc.HostCall(code, nullptr);
EmitFourOpFallbackWithoutRegAlloc(code, ctx, result, arg1, arg2, arg3, lambda, fpcr_controlled);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
} // anonymous namespace
@ -582,9 +582,9 @@ void EmitFourOpFallback(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, Lam
template<size_t fsize>
void FPVectorAbs(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
code.andps(a, GetNonSignMaskVector<fsize>(code));
ctx.reg_alloc.DefineValue(inst, a);
ctx.reg_alloc.DefineValue(code, inst, a);
}
void EmitX64::EmitFPVectorAbs16(EmitContext& ctx, IR::Inst* inst) {
@ -626,29 +626,29 @@ void EmitX64::EmitFPVectorEqual16(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPVectorEqual32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpeqps(a, b);
});
ctx.reg_alloc.DefineValue(inst, a);
ctx.reg_alloc.DefineValue(code, inst, a);
}
void EmitX64::EmitFPVectorEqual64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpeqpd(a, b);
});
ctx.reg_alloc.DefineValue(inst, a);
ctx.reg_alloc.DefineValue(code, inst, a);
}
template<FP::RoundingMode rounding_mode>
@ -664,13 +664,13 @@ void EmitX64::EmitFPVectorFromHalf32(EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::F16C) && !ctx.FPCR().AHP() && !ctx.FPCR().FZ16()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm value = ctx.reg_alloc.UseXmm(code, args[0]);
code.vcvtph2ps(result, value);
ForceToDefaultNaN<32>(code, ctx.FPCR(fpcr_controlled), result);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
@ -696,7 +696,7 @@ void EmitX64::EmitFPVectorFromHalf32(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const int fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const bool fpcr_controlled = args[3].GetImmediateU1();
@ -709,12 +709,12 @@ void EmitX64::EmitFPVectorFromSignedFixed32(EmitContext& ctx, IR::Inst* inst) {
}
});
ctx.reg_alloc.DefineValue(inst, xmm);
ctx.reg_alloc.DefineValue(code, inst, xmm);
}
void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const int fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const bool fpcr_controlled = args[3].GetImmediateU1();
@ -724,8 +724,8 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
code.vcvtqq2pd(xmm, xmm);
} else if (code.HasHostFeature(HostFeature::SSE41)) {
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
// First quadword
code.movq(tmp, xmm);
@ -738,9 +738,9 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
// Combine
code.unpcklpd(xmm, xmm_tmp);
} else {
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
const Xbyak::Xmm high_xmm = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_tmp = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(code);
// First quadword
code.movhlps(high_xmm, xmm);
@ -760,12 +760,12 @@ void EmitX64::EmitFPVectorFromSignedFixed64(EmitContext& ctx, IR::Inst* inst) {
}
});
ctx.reg_alloc.DefineValue(inst, xmm);
ctx.reg_alloc.DefineValue(code, inst, xmm);
}
void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const int fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const bool fpcr_controlled = args[3].GetImmediateU1();
@ -779,7 +779,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
const Xbyak::Address mem_53000000 = code.BConst<32>(xword, 0x53000000);
const Xbyak::Address mem_D3000080 = code.BConst<32>(xword, 0xD3000080);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vpblendw(tmp, xmm, mem_4B000000, 0b10101010);
@ -810,12 +810,12 @@ void EmitX64::EmitFPVectorFromUnsignedFixed32(EmitContext& ctx, IR::Inst* inst)
}
});
ctx.reg_alloc.DefineValue(inst, xmm);
ctx.reg_alloc.DefineValue(code, inst, xmm);
}
void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const int fbits = args[1].GetImmediateU8();
const FP::RoundingMode rounding_mode = static_cast<FP::RoundingMode>(args[2].GetImmediateU8());
const bool fpcr_controlled = args[3].GetImmediateU1();
@ -828,9 +828,9 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
const Xbyak::Address unpack = code.Const(xword, 0x4530000043300000, 0);
const Xbyak::Address subtrahend = code.Const(xword, 0x4330000000000000, 0x4530000000000000);
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm unpack_reg = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm subtrahend_reg = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm tmp1 = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX)) {
code.vmovapd(unpack_reg, unpack);
@ -846,7 +846,7 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
code.vhaddpd(xmm, tmp1, xmm);
} else {
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code);
code.movapd(unpack_reg, unpack);
code.movapd(subtrahend_reg, subtrahend);
@ -877,63 +877,63 @@ void EmitX64::EmitFPVectorFromUnsignedFixed64(EmitContext& ctx, IR::Inst* inst)
}
});
ctx.reg_alloc.DefineValue(inst, xmm);
ctx.reg_alloc.DefineValue(code, inst, xmm);
}
void EmitX64::EmitFPVectorGreater32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpltps(b, a);
});
ctx.reg_alloc.DefineValue(inst, b);
ctx.reg_alloc.DefineValue(code, inst, b);
}
void EmitX64::EmitFPVectorGreater64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpltpd(b, a);
});
ctx.reg_alloc.DefineValue(inst, b);
ctx.reg_alloc.DefineValue(code, inst, b);
}
void EmitX64::EmitFPVectorGreaterEqual32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<32>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmpleps(b, a);
});
ctx.reg_alloc.DefineValue(inst, b);
ctx.reg_alloc.DefineValue(code, inst, b);
}
void EmitX64::EmitFPVectorGreaterEqual64(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[0]) : ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm a = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[0]) : ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<64>(code, ctx.FPCR(fpcr_controlled), {a, b}, xmm0);
code.cmplepd(b, a);
});
ctx.reg_alloc.DefineValue(inst, b);
ctx.reg_alloc.DefineValue(code, inst, b);
}
template<size_t fsize, bool is_max>
@ -942,12 +942,12 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
if (ctx.FPCR(fpcr_controlled).DN()) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(args[1]) : ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.FPCR(fpcr_controlled).FZ() ? ctx.reg_alloc.UseScratchXmm(code, args[1]) : ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm mask = xmm0;
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
@ -994,7 +994,7 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
}
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@ -1002,11 +1002,11 @@ static void EmitFPVectorMinMax(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
EmitThreeOpVectorOperation<fsize, DefaultIndexer>(
code, ctx, inst, [&](const Xbyak::Xmm& result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm mask = xmm0;
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm eq = ctx.reg_alloc.ScratchXmm(code);
if (ctx.FPCR(fpcr_controlled).FZ()) {
const Xbyak::Xmm prev_xmm_b = xmm_b;
xmm_b = ctx.reg_alloc.ScratchXmm();
xmm_b = ctx.reg_alloc.ScratchXmm(code);
code.movaps(xmm_b, prev_xmm_b);
DenormalsAreZero<fsize>(code, ctx.FPCR(fpcr_controlled), {result, xmm_b}, mask);
}
@ -1053,13 +1053,13 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
const bool fpcr_controlled = inst->GetArg(2).GetU1();
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm intermediate_result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm intermediate_result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm tmp1 = xmm0;
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp2 = ctx.reg_alloc.ScratchXmm(code);
// NaN requirements:
// op1 op2 result
@ -1139,7 +1139,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
}
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@ -1230,7 +1230,7 @@ static void EmitFPVectorMinMaxNumeric(BlockOfCode& code, EmitContext& ctx, IR::I
}
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitFPVectorMax32(EmitContext& ctx, IR::Inst* inst) {
@ -1316,27 +1316,27 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::FMA) && !needs_rounding_correction && !needs_nan_correction) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(code, args[2]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
FCODE(vfmadd231p)(result, xmm_b, xmm_c);
ForceToDefaultNaN<fsize>(code, ctx.FPCR(fpcr_controlled), result);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
if (code.HasHostFeature(HostFeature::FMA | HostFeature::AVX)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm xmm_c = ctx.reg_alloc.UseXmm(code, args[2]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
@ -1375,21 +1375,21 @@ void EmitFPVectorMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(args[2]);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseScratchXmm(code, args[1]);
const Xbyak::Xmm operand3 = ctx.reg_alloc.UseXmm(code, args[2]);
FCODE(mulp)(operand2, operand3);
FCODE(addp)(operand1, operand2);
ctx.reg_alloc.DefineValue(inst, operand1);
ctx.reg_alloc.DefineValue(code, inst, operand1);
return;
}
}
@ -1417,10 +1417,10 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
const bool fpcr_controlled = args[2].GetImmediateU1();
if (ctx.FPCR(fpcr_controlled).DN() && code.HasHostFeature(HostFeature::AVX)) {
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm twos = ctx.reg_alloc.ScratchXmm(code);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
FCODE(vcmpunordp)(xmm0, result, operand);
@ -1434,14 +1434,14 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
FCODE(blendvp)(result, twos);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm xmm_a = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm nan_mask = ctx.reg_alloc.ScratchXmm(code);
code.movaps(nan_mask, xmm_b);
code.movaps(result, xmm_a);
@ -1464,7 +1464,7 @@ static void EmitFPVectorMulX(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
HandleNaNs<fsize, 2>(code, ctx, fpcr_controlled, {result, xmm_a, xmm_b}, nan_mask, nan_handler);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
void EmitX64::EmitFPVectorMulX32(EmitContext& ctx, IR::Inst* inst) {
@ -1482,12 +1482,12 @@ void FPVectorNeg(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm a = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Address mask = code.BConst<fsize>(xword, sign_mask);
code.xorps(a, mask);
ctx.reg_alloc.DefineValue(inst, a);
ctx.reg_alloc.DefineValue(code, inst, a);
}
void EmitX64::EmitFPVectorNeg16(EmitContext& ctx, IR::Inst* inst) {
@ -1512,7 +1512,7 @@ void EmitX64::EmitFPVectorPairedAdd64(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<32, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
code.xorps(zero, zero);
code.punpcklqdq(result, xmm_b);
code.haddps(result, zero);
@ -1521,7 +1521,7 @@ void EmitX64::EmitFPVectorPairedAddLower32(EmitContext& ctx, IR::Inst* inst) {
void EmitX64::EmitFPVectorPairedAddLower64(EmitContext& ctx, IR::Inst* inst) {
EmitThreeOpVectorOperation<64, PairedLowerIndexer>(code, ctx, inst, [&](Xbyak::Xmm result, Xbyak::Xmm xmm_b) {
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm zero = ctx.reg_alloc.ScratchXmm(code);
code.xorps(zero, zero);
code.punpcklqdq(result, xmm_b);
code.haddpd(result, zero);
@ -1535,8 +1535,8 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
if constexpr (fsize != 16) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
FCODE(vrcp14p)(result, operand);
@ -1550,7 +1550,7 @@ static void EmitRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
}
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
}
@ -1589,16 +1589,16 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
FCODE(vfnmadd231p)(result, operand1, operand2);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@ -1606,10 +1606,10 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
@ -1633,22 +1633,22 @@ static void EmitRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movaps(result, GetVectorOf<fsize, false, 0, 2>(code));
FCODE(mulp)(operand1, operand2);
FCODE(subp)(result, operand1);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
}
@ -1757,8 +1757,8 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
if constexpr (fsize != 16) {
if (ctx.HasOptimization(OptimizationFlag::Unsafe_ReducedErrorFP)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
FCODE(vrsqrt14p)(result, operand);
@ -1772,7 +1772,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
}
}
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@ -1780,9 +1780,9 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[1].GetImmediateU1();
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm value = ctx.reg_alloc.ScratchXmm(code);
SharedLabel bad_values = GenSharedLabel(), end = GenSharedLabel();
@ -1816,7 +1816,7 @@ static void EmitRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* ins
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
}
@ -1851,9 +1851,9 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
code.vmovaps(result, GetVectorOf<fsize, false, 0, 3>(code));
@ -1861,7 +1861,7 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
FCODE(vmulp)(result, result, GetVectorOf<fsize, false, -1, 1>(code));
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
@ -1869,11 +1869,11 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const bool fpcr_controlled = args[2].GetImmediateU1();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Xmm mask = ctx.reg_alloc.ScratchXmm(code);
SharedLabel end = GenSharedLabel(), fallback = GenSharedLabel();
@ -1902,23 +1902,23 @@ static void EmitRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.jmp(*end, code.T_NEAR);
});
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
if (ctx.HasOptimization(OptimizationFlag::Unsafe_UnfuseFMA)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
code.movaps(result, GetVectorOf<fsize, false, 0, 3>(code));
FCODE(mulp)(operand1, operand2);
FCODE(subp)(result, operand1);
FCODE(mulp)(result, GetVectorOf<fsize, false, -1, 1>(code));
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
}
@ -1972,12 +1972,12 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const auto round_imm = ConvertRoundingModeToX64Immediate(rounding_mode);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
ForceToDefaultNaN<32>(code, ctx.FPCR(fpcr_controlled), result);
code.vcvtps2ph(result, result, u8(*round_imm));
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
switch (rounding_mode) {
case FP::RoundingMode::ToNearest_TieEven:
@ -2018,7 +2018,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::SSE41) && rounding != FP::RoundingMode::ToNearest_TieAwayFromZero) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm src = ctx.reg_alloc.UseScratchXmm(code, args[0]);
MaybeStandardFPSCRValue(code, ctx, fpcr_controlled, [&] {
const int round_imm = [&] {
@ -2045,8 +2045,8 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::AVX512_OrthoFloat)) {
code.vcvttpd2qq(src, src);
} else {
const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr();
const Xbyak::Reg64 hi = ctx.reg_alloc.ScratchGpr(code);
const Xbyak::Reg64 lo = ctx.reg_alloc.ScratchGpr(code);
code.cvttsd2si(lo, src);
code.punpckhqdq(src, src);
@ -2093,12 +2093,12 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
FCODE(andp)(src, xmm0);
// Will we exceed unsigned range?
const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm exceed_unsigned = ctx.reg_alloc.ScratchXmm(code);
code.movaps(exceed_unsigned, GetVectorOf<fsize, float_upper_limit_unsigned>(code));
FCODE(cmplep)(exceed_unsigned, src);
// Will be exceed signed range?
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
code.movaps(tmp, GetVectorOf<fsize, float_upper_limit_signed>(code));
code.movaps(xmm0, tmp);
FCODE(cmplep)(xmm0, src);
@ -2122,7 +2122,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
}
});
ctx.reg_alloc.DefineValue(inst, src);
ctx.reg_alloc.DefineValue(code, inst, src);
return;
}
}

View File

@ -26,9 +26,9 @@ namespace {
void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, void (Xbyak::CodeGenerator::*saturated_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*unsaturated_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&), void (Xbyak::CodeGenerator::*sub_fn)(const Xbyak::Mmx& mmx, const Xbyak::Operand&)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm addend = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
code.movaps(xmm0, result);
@ -39,7 +39,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
if (code.HasHostFeature(HostFeature::SSE41)) {
code.ptest(xmm0, xmm0);
} else {
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
code.pxor(tmp, tmp);
code.pcmpeqw(xmm0, tmp);
code.pmovmskb(overflow.cvt32(), xmm0);
@ -49,7 +49,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.setnz(overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
}
enum class Op {
@ -65,10 +65,10 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) {
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
code.movaps(xmm0, operand1);
@ -91,15 +91,15 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.setnz(overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(args[0]) : ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm() : operand1;
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(code, args[0]) : ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm(code) : operand1;
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
if (code.HasHostFeature(HostFeature::AVX)) {
if constexpr (op == Op::Add) {
@ -150,7 +150,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
if (code.HasHostFeature(HostFeature::SSE41)) {
FCODE(blendvp)(result, tmp);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
code.psrad(xmm0, 31);
if constexpr (esize == 64) {
@ -161,7 +161,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
code.pandn(xmm0, result);
code.por(tmp, xmm0);
ctx.reg_alloc.DefineValue(inst, tmp);
ctx.reg_alloc.DefineValue(code, inst, tmp);
}
}
@ -172,10 +172,10 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512DQ)) {
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
const Xbyak::Xmm operand1 = ctx.reg_alloc.UseXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm(code);
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
if constexpr (op == Op::Add) {
ICODE(vpadd)(result, operand1, operand2);
@ -191,15 +191,15 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
code.setnz(overflow);
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
return;
}
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(args[0]) : ctx.reg_alloc.UseScratchXmm(args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(args[1]);
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm() : operand1;
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr().cvt8();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
const Xbyak::Xmm operand1 = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.UseXmm(code, args[0]) : ctx.reg_alloc.UseScratchXmm(code, args[0]);
const Xbyak::Xmm operand2 = ctx.reg_alloc.UseXmm(code, args[1]);
const Xbyak::Xmm result = code.HasHostFeature(HostFeature::AVX) ? ctx.reg_alloc.ScratchXmm(code) : operand1;
const Xbyak::Reg8 overflow = ctx.reg_alloc.ScratchGpr(code).cvt8();
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(code);
if constexpr (op == Op::Add) {
if (code.HasHostFeature(HostFeature::AVX)) {
@ -252,10 +252,10 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
if constexpr (op == Op::Add) {
code.por(result, tmp);
ctx.reg_alloc.DefineValue(inst, result);
ctx.reg_alloc.DefineValue(code, inst, result);
} else {
code.pandn(tmp, result);
ctx.reg_alloc.DefineValue(inst, tmp);
ctx.reg_alloc.DefineValue(code, inst, tmp);
}
}

View File

@ -1,25 +0,0 @@
/* This file is part of the dynarmic project.
* Copyright (c) 2016 MerryMage
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/x64/hostloc.h"
#include <xbyak/xbyak.h>
#include "dynarmic/backend/x64/abi.h"
#include "dynarmic/backend/x64/stack_layout.h"
namespace Dynarmic::Backend::X64 {
Xbyak::Reg64 HostLocToReg64(HostLoc loc) {
ASSERT(HostLocIsGPR(loc));
return Xbyak::Reg64(static_cast<int>(loc));
}
Xbyak::Xmm HostLocToXmm(HostLoc loc) {
ASSERT(HostLocIsXMM(loc));
return Xbyak::Xmm(static_cast<int>(loc) - static_cast<int>(HostLoc::XMM0));
}
} // namespace Dynarmic::Backend::X64

View File

@ -152,7 +152,14 @@ const HostLocList any_xmm = {
HostLoc::XMM15,
};
Xbyak::Reg64 HostLocToReg64(HostLoc loc);
Xbyak::Xmm HostLocToXmm(HostLoc loc);
inline Xbyak::Reg64 HostLocToReg64(HostLoc loc) noexcept {
ASSERT(HostLocIsGPR(loc));
return Xbyak::Reg64(int(loc));
}
inline Xbyak::Xmm HostLocToXmm(HostLoc loc) noexcept {
ASSERT(HostLocIsXMM(loc));
return Xbyak::Xmm(int(loc) - int(HostLoc::XMM0));
}
} // namespace Dynarmic::Backend::X64

View File

@ -24,15 +24,6 @@
namespace Dynarmic::Backend::X64 {
#define MAYBE_AVX(OPCODE, ...) \
[&] { \
if (code->HasHostFeature(HostFeature::AVX)) { \
code->v##OPCODE(__VA_ARGS__); \
} else { \
code->OPCODE(__VA_ARGS__); \
} \
}()
static inline bool CanExchange(const HostLoc a, const HostLoc b) noexcept {
return HostLocIsGPR(a) && HostLocIsGPR(b);
}
@ -107,14 +98,14 @@ void HostLocInfo::AddValue(IR::Inst* inst) noexcept {
max_bit_width = std::max<uint8_t>(max_bit_width, std::countr_zero(GetBitWidth(inst->GetType())));
}
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept {
void HostLocInfo::EmitVerboseDebuggingOutput(BlockOfCode& code, size_t host_loc_index) const noexcept {
using namespace Xbyak::util;
for (auto const value : values) {
code->mov(code->ABI_PARAM1, rsp);
code->mov(code->ABI_PARAM2, host_loc_index);
code->mov(code->ABI_PARAM3, value->GetName());
code->mov(code->ABI_PARAM4, GetBitWidth(value->GetType()));
code->CallFunction(PrintVerboseDebuggingOutputLine);
code.mov(code.ABI_PARAM1, rsp);
code.mov(code.ABI_PARAM2, host_loc_index);
code.mov(code.ABI_PARAM3, value->GetName());
code.mov(code.ABI_PARAM4, GetBitWidth(value->GetType()));
code.CallFunction(PrintVerboseDebuggingOutputLine);
}
}
@ -128,7 +119,7 @@ bool Argument::FitsInImmediateU32() const noexcept {
bool Argument::FitsInImmediateS32() const noexcept {
if (!IsImmediate())
return false;
const s64 imm = static_cast<s64>(value.GetImmediateAsU64());
const s64 imm = s64(value.GetImmediateAsU64());
return -s64(0x80000000) <= imm && imm <= s64(0x7FFFFFFF);
}
@ -174,36 +165,38 @@ IR::AccType Argument::GetImmediateAccType() const noexcept {
}
/// Is this value currently in a GPR?
bool Argument::IsInGpr() const noexcept {
bool Argument::IsInGpr(RegAlloc& reg_alloc) const noexcept {
if (IsImmediate())
return false;
return HostLocIsGPR(*reg_alloc.ValueLocation(value.GetInst()));
}
/// Is this value currently in a XMM?
bool Argument::IsInXmm() const noexcept {
bool Argument::IsInXmm(RegAlloc& reg_alloc) const noexcept {
if (IsImmediate())
return false;
return HostLocIsXMM(*reg_alloc.ValueLocation(value.GetInst()));
}
/// Is this value currently in memory?
bool Argument::IsInMemory() const noexcept {
bool Argument::IsInMemory(RegAlloc& reg_alloc) const noexcept {
if (IsImmediate())
return false;
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
}
RegAlloc::RegAlloc(BlockOfCode* code, boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept
RegAlloc::RegAlloc(boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept
: gpr_order(gpr_order),
xmm_order(xmm_order),
code(code)
xmm_order(xmm_order)
{}
//static std::uint64_t Zfncwjkrt_blockOfCodeShim = 0;
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(const IR::Inst* inst) noexcept {
ArgumentInfo ret{Argument{*this}, Argument{*this}, Argument{*this}, Argument{*this}};
ArgumentInfo ret{
Argument{},
Argument{},
Argument{},
Argument{}
};
for (size_t i = 0; i < inst->NumArgs(); i++) {
const auto arg = inst->GetArg(i);
ret[i].value = arg;
@ -228,34 +221,34 @@ void RegAlloc::RegisterPseudoOperation(const IR::Inst* inst) noexcept {
}
}
Xbyak::Reg64 RegAlloc::UseScratchGpr(Argument& arg) noexcept {
Xbyak::Reg64 RegAlloc::UseScratchGpr(BlockOfCode& code, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseScratchImpl(arg.value, gpr_order));
return HostLocToReg64(UseScratchImpl(code, arg.value, gpr_order));
}
Xbyak::Xmm RegAlloc::UseScratchXmm(Argument& arg) noexcept {
Xbyak::Xmm RegAlloc::UseScratchXmm(BlockOfCode& code, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToXmm(UseScratchImpl(arg.value, xmm_order));
return HostLocToXmm(UseScratchImpl(code, arg.value, xmm_order));
}
void RegAlloc::UseScratch(Argument& arg, HostLoc host_loc) noexcept {
void RegAlloc::UseScratch(BlockOfCode& code, Argument& arg, HostLoc host_loc) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
UseScratchImpl(arg.value, {host_loc});
UseScratchImpl(code, arg.value, {host_loc});
}
void RegAlloc::DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept {
void RegAlloc::DefineValue(BlockOfCode& code, IR::Inst* inst, const Xbyak::Reg& reg) noexcept {
ASSERT(reg.getKind() == Xbyak::Operand::XMM || reg.getKind() == Xbyak::Operand::REG);
const auto hostloc = static_cast<HostLoc>(reg.getIdx() + static_cast<size_t>(reg.getKind() == Xbyak::Operand::XMM ? HostLoc::XMM0 : HostLoc::RAX));
DefineValueImpl(inst, hostloc);
DefineValueImpl(code, inst, hostloc);
}
void RegAlloc::DefineValue(IR::Inst* inst, Argument& arg) noexcept {
void RegAlloc::DefineValue(BlockOfCode& code, IR::Inst* inst, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
DefineValueImpl(inst, arg.value);
DefineValueImpl(code, inst, arg.value);
}
void RegAlloc::Release(const Xbyak::Reg& reg) noexcept {
@ -264,9 +257,9 @@ void RegAlloc::Release(const Xbyak::Reg& reg) noexcept {
LocInfo(hostloc).ReleaseOne();
}
HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
HostLoc RegAlloc::UseImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
return LoadImmediate(code, use_value, ScratchImpl(code, desired_locations));
}
const auto* use_inst = use_value.GetInst();
@ -280,25 +273,25 @@ HostLoc RegAlloc::UseImpl(IR::Value use_value, const boost::container::static_ve
}
if (LocInfo(current_location).IsLocked()) {
return UseScratchImpl(use_value, desired_locations);
return UseScratchImpl(code, use_value, desired_locations);
}
const HostLoc destination_location = SelectARegister(desired_locations);
if (max_bit_width > HostLocBitWidth(destination_location)) {
return UseScratchImpl(use_value, desired_locations);
return UseScratchImpl(code, use_value, desired_locations);
} else if (CanExchange(destination_location, current_location)) {
Exchange(destination_location, current_location);
Exchange(code, destination_location, current_location);
} else {
MoveOutOfTheWay(destination_location);
Move(destination_location, current_location);
MoveOutOfTheWay(code, destination_location);
Move(code, destination_location, current_location);
}
LocInfo(destination_location).ReadLock();
return destination_location;
}
HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
HostLoc RegAlloc::UseScratchImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
if (use_value.IsImmediate()) {
return LoadImmediate(use_value, ScratchImpl(desired_locations));
return LoadImmediate(code, use_value, ScratchImpl(code, desired_locations));
}
const auto* use_inst = use_value.GetInst();
@ -308,7 +301,7 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::st
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
if (!LocInfo(current_location).IsLastUse()) {
MoveOutOfTheWay(current_location);
MoveOutOfTheWay(code, current_location);
} else {
LocInfo(current_location).SetLastUse();
}
@ -317,20 +310,22 @@ HostLoc RegAlloc::UseScratchImpl(IR::Value use_value, const boost::container::st
}
const HostLoc destination_location = SelectARegister(desired_locations);
MoveOutOfTheWay(destination_location);
CopyToScratch(bit_width, destination_location, current_location);
MoveOutOfTheWay(code, destination_location);
CopyToScratch(code, bit_width, destination_location, current_location);
LocInfo(destination_location).WriteLock();
return destination_location;
}
HostLoc RegAlloc::ScratchImpl(const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
HostLoc RegAlloc::ScratchImpl(BlockOfCode& code, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
const HostLoc location = SelectARegister(desired_locations);
MoveOutOfTheWay(location);
MoveOutOfTheWay(code, location);
LocInfo(location).WriteLock();
return location;
}
void RegAlloc::HostCall(IR::Inst* result_def,
void RegAlloc::HostCall(
BlockOfCode& code,
IR::Inst* result_def,
const std::optional<Argument::copyable_reference> arg0,
const std::optional<Argument::copyable_reference> arg1,
const std::optional<Argument::copyable_reference> arg2,
@ -348,20 +343,20 @@ void RegAlloc::HostCall(IR::Inst* result_def,
return ret;
}();
ScratchGpr(ABI_RETURN);
if (result_def) {
DefineValueImpl(result_def, ABI_RETURN);
}
ScratchGpr(code, ABI_RETURN);
if (result_def)
DefineValueImpl(code, result_def, ABI_RETURN);
for (size_t i = 0; i < args.size(); i++) {
if (args[i]) {
UseScratch(*args[i], args_hostloc[i]);
UseScratch(code, *args[i], args_hostloc[i]);
} else {
ScratchGpr(args_hostloc[i]); // TODO: Force spill
ScratchGpr(code, args_hostloc[i]); // TODO: Force spill
}
}
// Must match with with ScratchImpl
for (auto const gpr : other_caller_save) {
MoveOutOfTheWay(gpr);
MoveOutOfTheWay(code, gpr);
LocInfo(gpr).WriteLock();
}
for (size_t i = 0; i < args.size(); i++) {
@ -370,13 +365,13 @@ void RegAlloc::HostCall(IR::Inst* result_def,
const Xbyak::Reg64 reg = HostLocToReg64(args_hostloc[i]);
switch (args[i]->get().GetType()) {
case IR::Type::U8:
code->movzx(reg.cvt32(), reg.cvt8());
code.movzx(reg.cvt32(), reg.cvt8());
break;
case IR::Type::U16:
code->movzx(reg.cvt32(), reg.cvt16());
code.movzx(reg.cvt32(), reg.cvt16());
break;
case IR::Type::U32:
code->mov(reg.cvt32(), reg.cvt32());
code.mov(reg.cvt32(), reg.cvt32());
break;
case IR::Type::U64:
break; //no op
@ -387,18 +382,18 @@ void RegAlloc::HostCall(IR::Inst* result_def,
}
}
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
void RegAlloc::AllocStackSpace(BlockOfCode& code, const size_t stack_space) noexcept {
ASSERT(stack_space < size_t((std::numeric_limits<s32>::max)()));
ASSERT(reserved_stack_space == 0);
reserved_stack_space = stack_space;
code->sub(code->rsp, u32(stack_space));
code.sub(code.rsp, u32(stack_space));
}
void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept {
void RegAlloc::ReleaseStackSpace(BlockOfCode& code, const size_t stack_space) noexcept {
ASSERT(stack_space < size_t((std::numeric_limits<s32>::max)()));
ASSERT(reserved_stack_space == stack_space);
reserved_stack_space = 0;
code->add(code->rsp, u32(stack_space));
code.add(code.rsp, u32(stack_space));
}
HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept {
@ -458,92 +453,75 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
return *it_final;
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept {
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const noexcept {
for (size_t i = 0; i < hostloc_info.size(); i++)
if (hostloc_info[i].ContainsValue(value))
return HostLoc(i);
return std::nullopt;
}
void RegAlloc::DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, HostLoc host_loc) noexcept {
ASSERT(!ValueLocation(def_inst) && "def_inst has already been defined");
LocInfo(host_loc).AddValue(def_inst);
}
void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept {
void RegAlloc::DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, const IR::Value& use_inst) noexcept {
ASSERT(!ValueLocation(def_inst) && "def_inst has already been defined");
if (use_inst.IsImmediate()) {
const HostLoc location = ScratchImpl(gpr_order);
DefineValueImpl(def_inst, location);
LoadImmediate(use_inst, location);
const HostLoc location = ScratchImpl(code, gpr_order);
DefineValueImpl(code, def_inst, location);
LoadImmediate(code, use_inst, location);
return;
}
ASSERT(ValueLocation(use_inst.GetInst()) && "use_inst must already be defined");
const HostLoc location = *ValueLocation(use_inst.GetInst());
DefineValueImpl(def_inst, location);
DefineValueImpl(code, def_inst, location);
}
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept {
ASSERT(imm.IsImmediate() && "imm is not an immediate");
if (HostLocIsGPR(host_loc)) {
const Xbyak::Reg64 reg = HostLocToReg64(host_loc);
const u64 imm_value = imm.GetImmediateAsU64();
if (imm_value == 0) {
code->xor_(reg.cvt32(), reg.cvt32());
} else {
code->mov(reg, imm_value);
}
} else if (HostLocIsXMM(host_loc)) {
const Xbyak::Xmm reg = HostLocToXmm(host_loc);
const u64 imm_value = imm.GetImmediateAsU64();
if (imm_value == 0) {
MAYBE_AVX(xorps, reg, reg);
} else {
MAYBE_AVX(movaps, reg, code->Const(code->xword, imm_value));
}
} else {
UNREACHABLE();
}
return host_loc;
}
void RegAlloc::Move(HostLoc to, HostLoc from) noexcept {
void RegAlloc::Move(BlockOfCode& code, HostLoc to, HostLoc from) noexcept {
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
ASSERT(bit_width <= HostLocBitWidth(to));
ASSERT(!LocInfo(from).IsEmpty() && "Mov eliminated");
EmitMove(bit_width, to, from);
EmitMove(code, bit_width, to, from);
LocInfo(to) = std::exchange(LocInfo(from), {});
}
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept {
void RegAlloc::CopyToScratch(BlockOfCode& code, size_t bit_width, HostLoc to, HostLoc from) noexcept {
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsEmpty());
EmitMove(bit_width, to, from);
EmitMove(code, bit_width, to, from);
}
void RegAlloc::Exchange(HostLoc a, HostLoc b) noexcept {
void RegAlloc::Exchange(BlockOfCode& code, HostLoc a, HostLoc b) noexcept {
ASSERT(!LocInfo(a).IsLocked() && !LocInfo(b).IsLocked());
ASSERT(LocInfo(a).GetMaxBitWidth() <= HostLocBitWidth(b));
ASSERT(LocInfo(b).GetMaxBitWidth() <= HostLocBitWidth(a));
if (LocInfo(a).IsEmpty()) {
Move(a, b);
Move(code, a, b);
} else if (LocInfo(b).IsEmpty()) {
Move(b, a);
Move(code, b, a);
} else {
EmitExchange(a, b);
EmitExchange(code, a, b);
std::swap(LocInfo(a), LocInfo(b));
}
}
void RegAlloc::MoveOutOfTheWay(HostLoc reg) noexcept {
void RegAlloc::MoveOutOfTheWay(BlockOfCode& code, HostLoc reg) noexcept {
ASSERT(!LocInfo(reg).IsLocked());
if (!LocInfo(reg).IsEmpty()) {
SpillRegister(reg);
SpillRegister(code, reg);
}
}
void RegAlloc::SpillRegister(HostLoc loc) noexcept {
void RegAlloc::SpillRegister(BlockOfCode& code, HostLoc loc) noexcept {
ASSERT(HostLocIsRegister(loc) && "Only registers can be spilled");
ASSERT(!LocInfo(loc).IsEmpty() && "There is no need to spill unoccupied registers");
ASSERT(!LocInfo(loc).IsLocked() && "Registers that have been allocated must not be spilt");
auto const new_loc = FindFreeSpill(HostLocIsXMM(loc));
Move(new_loc, loc);
Move(code, new_loc, loc);
}
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
@ -568,9 +546,39 @@ HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
return loc;
UNREACHABLE();
};
}
void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
#define MAYBE_AVX(OPCODE, ...) \
[&] { \
if (code.HasHostFeature(HostFeature::AVX)) code.v##OPCODE(__VA_ARGS__); \
else code.OPCODE(__VA_ARGS__); \
}()
HostLoc RegAlloc::LoadImmediate(BlockOfCode& code, IR::Value imm, HostLoc host_loc) noexcept {
ASSERT(imm.IsImmediate() && "imm is not an immediate");
if (HostLocIsGPR(host_loc)) {
const Xbyak::Reg64 reg = HostLocToReg64(host_loc);
const u64 imm_value = imm.GetImmediateAsU64();
if (imm_value == 0) {
code.xor_(reg.cvt32(), reg.cvt32());
} else {
code.mov(reg, imm_value);
}
} else if (HostLocIsXMM(host_loc)) {
const Xbyak::Xmm reg = HostLocToXmm(host_loc);
const u64 imm_value = imm.GetImmediateAsU64();
if (imm_value == 0) {
MAYBE_AVX(xorps, reg, reg);
} else {
MAYBE_AVX(movaps, reg, code.Const(code.xword, imm_value));
}
} else {
UNREACHABLE();
}
return host_loc;
}
void RegAlloc::EmitMove(BlockOfCode& code, const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
auto const spill_to_op_arg_helper = [&](HostLoc loc, size_t reserved_stack_space) {
ASSERT(HostLocIsSpill(loc));
size_t i = size_t(loc) - size_t(HostLoc::FirstSpill);
@ -585,9 +593,9 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->mov(HostLocToReg64(to), HostLocToReg64(from));
code.mov(HostLocToReg64(to), HostLocToReg64(from));
} else {
code->mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
code.mov(HostLocToReg64(to).cvt32(), HostLocToReg64(from).cvt32());
}
} else if (HostLocIsXMM(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
@ -642,25 +650,26 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]);
code.mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]);
} else {
code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]);
code.mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]);
}
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
ASSERT(bit_width != 128);
if (bit_width == 64) {
code->mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from));
code.mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from));
} else {
code->mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
code.mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
}
} else {
UNREACHABLE();
}
}
#undef MAYBE_AVX
void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept {
void RegAlloc::EmitExchange(BlockOfCode& code, const HostLoc a, const HostLoc b) noexcept {
ASSERT(HostLocIsGPR(a) && HostLocIsGPR(b) && "Exchanging XMM registers is uneeded OR invalid emit");
code->xchg(HostLocToReg64(a), HostLocToReg64(b));
code.xchg(HostLocToReg64(a), HostLocToReg64(b));
}
} // namespace Dynarmic::Backend::X64

View File

@ -81,7 +81,7 @@ public:
return 1 << max_bit_width;
}
void AddValue(IR::Inst* inst) noexcept;
void EmitVerboseDebuggingOutput(BlockOfCode* code, size_t host_loc_index) const noexcept;
void EmitVerboseDebuggingOutput(BlockOfCode& code, size_t host_loc_index) const noexcept;
private:
//non trivial
boost::container::small_vector<IR::Inst*, 3> values; //24
@ -129,16 +129,15 @@ public:
IR::AccType GetImmediateAccType() const noexcept;
/// Is this value currently in a GPR?
bool IsInGpr() const noexcept;
bool IsInXmm() const noexcept;
bool IsInMemory() const noexcept;
bool IsInGpr(RegAlloc& reg_alloc) const noexcept;
bool IsInXmm(RegAlloc& reg_alloc) const noexcept;
bool IsInMemory(RegAlloc& reg_alloc) const noexcept;
private:
friend class RegAlloc;
explicit Argument(RegAlloc& reg_alloc) : reg_alloc(reg_alloc) {}
explicit Argument() {}
//data
IR::Value value; //8
RegAlloc& reg_alloc; //8
bool allocated = false; //1
};
@ -146,55 +145,57 @@ class RegAlloc final {
public:
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
RegAlloc() noexcept = default;
RegAlloc(BlockOfCode* code, boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept;
RegAlloc(boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept;
ArgumentInfo GetArgumentInfo(const IR::Inst* inst) noexcept;
void RegisterPseudoOperation(const IR::Inst* inst) noexcept;
inline bool IsValueLive(const IR::Inst* inst) const noexcept {
return !!ValueLocation(inst);
}
inline Xbyak::Reg64 UseGpr(Argument& arg) noexcept {
inline Xbyak::Reg64 UseGpr(BlockOfCode& code, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToReg64(UseImpl(arg.value, gpr_order));
return HostLocToReg64(UseImpl(code, arg.value, gpr_order));
}
inline Xbyak::Xmm UseXmm(Argument& arg) noexcept {
inline Xbyak::Xmm UseXmm(BlockOfCode& code, Argument& arg) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
return HostLocToXmm(UseImpl(arg.value, xmm_order));
return HostLocToXmm(UseImpl(code, arg.value, xmm_order));
}
inline OpArg UseOpArg(Argument& arg) noexcept {
return UseGpr(arg);
inline OpArg UseOpArg(BlockOfCode& code, Argument& arg) noexcept {
return UseGpr(code, arg);
}
inline void Use(Argument& arg, const HostLoc host_loc) noexcept {
inline void Use(BlockOfCode& code, Argument& arg, const HostLoc host_loc) noexcept {
ASSERT(!arg.allocated);
arg.allocated = true;
UseImpl(arg.value, {host_loc});
UseImpl(code, arg.value, {host_loc});
}
Xbyak::Reg64 UseScratchGpr(Argument& arg) noexcept;
Xbyak::Xmm UseScratchXmm(Argument& arg) noexcept;
void UseScratch(Argument& arg, HostLoc host_loc) noexcept;
Xbyak::Reg64 UseScratchGpr(BlockOfCode& code, Argument& arg) noexcept;
Xbyak::Xmm UseScratchXmm(BlockOfCode& code, Argument& arg) noexcept;
void UseScratch(BlockOfCode& code, Argument& arg, HostLoc host_loc) noexcept;
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg) noexcept;
void DefineValue(IR::Inst* inst, Argument& arg) noexcept;
void DefineValue(BlockOfCode& code, IR::Inst* inst, const Xbyak::Reg& reg) noexcept;
void DefineValue(BlockOfCode& code, IR::Inst* inst, Argument& arg) noexcept;
void Release(const Xbyak::Reg& reg) noexcept;
inline Xbyak::Reg64 ScratchGpr() noexcept {
return HostLocToReg64(ScratchImpl(gpr_order));
inline Xbyak::Reg64 ScratchGpr(BlockOfCode& code) noexcept {
return HostLocToReg64(ScratchImpl(code, gpr_order));
}
inline Xbyak::Reg64 ScratchGpr(const HostLoc desired_location) noexcept {
return HostLocToReg64(ScratchImpl({desired_location}));
inline Xbyak::Reg64 ScratchGpr(BlockOfCode& code, const HostLoc desired_location) noexcept {
return HostLocToReg64(ScratchImpl(code, {desired_location}));
}
inline Xbyak::Xmm ScratchXmm() noexcept {
return HostLocToXmm(ScratchImpl(xmm_order));
inline Xbyak::Xmm ScratchXmm(BlockOfCode& code) noexcept {
return HostLocToXmm(ScratchImpl(code, xmm_order));
}
inline Xbyak::Xmm ScratchXmm(HostLoc desired_location) noexcept {
return HostLocToXmm(ScratchImpl({desired_location}));
inline Xbyak::Xmm ScratchXmm(BlockOfCode& code, HostLoc desired_location) noexcept {
return HostLocToXmm(ScratchImpl(code, {desired_location}));
}
void HostCall(IR::Inst* result_def = nullptr,
void HostCall(
BlockOfCode& code,
IR::Inst* result_def = nullptr,
const std::optional<Argument::copyable_reference> arg0 = {},
const std::optional<Argument::copyable_reference> arg1 = {},
const std::optional<Argument::copyable_reference> arg2 = {},
@ -202,67 +203,56 @@ public:
) noexcept;
// TODO: Values in host flags
void AllocStackSpace(const size_t stack_space) noexcept;
void ReleaseStackSpace(const size_t stack_space) noexcept;
void AllocStackSpace(BlockOfCode& code, const size_t stack_space) noexcept;
void ReleaseStackSpace(BlockOfCode& code, const size_t stack_space) noexcept;
inline void EndOfAllocScope() noexcept {
for (auto& iter : hostloc_info) {
for (auto& iter : hostloc_info)
iter.ReleaseAll();
}
}
inline void AssertNoMoreUses() noexcept {
ASSERT(std::all_of(hostloc_info.begin(), hostloc_info.end(), [](const auto& i) noexcept { return i.IsEmpty(); }));
}
inline void EmitVerboseDebuggingOutput() noexcept {
for (size_t i = 0; i < hostloc_info.size(); i++) {
inline void EmitVerboseDebuggingOutput(BlockOfCode& code) noexcept {
for (size_t i = 0; i < hostloc_info.size(); i++)
hostloc_info[i].EmitVerboseDebuggingOutput(code, i);
}
}
private:
friend struct Argument;
HostLoc SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept;
inline std::optional<HostLoc> ValueLocation(const IR::Inst* value) const noexcept {
for (size_t i = 0; i < hostloc_info.size(); i++) {
if (hostloc_info[i].ContainsValue(value)) {
return HostLoc(i);
}
}
return std::nullopt;
}
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const noexcept;
HostLoc UseImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
HostLoc UseScratchImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
HostLoc ScratchImpl(BlockOfCode& code, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
void DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, HostLoc host_loc) noexcept;
void DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, const IR::Value& use_inst) noexcept;
HostLoc UseImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
HostLoc UseScratchImpl(IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
HostLoc ScratchImpl(const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
void DefineValueImpl(IR::Inst* def_inst, HostLoc host_loc) noexcept;
void DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) noexcept;
HostLoc LoadImmediate(BlockOfCode& code, IR::Value imm, HostLoc host_loc) noexcept;
void Move(BlockOfCode& code, HostLoc to, HostLoc from) noexcept;
void CopyToScratch(BlockOfCode& code, size_t bit_width, HostLoc to, HostLoc from) noexcept;
void Exchange(BlockOfCode& code, HostLoc a, HostLoc b) noexcept;
void MoveOutOfTheWay(BlockOfCode& code, HostLoc reg) noexcept;
HostLoc LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept;
void Move(HostLoc to, HostLoc from) noexcept;
void CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept;
void Exchange(HostLoc a, HostLoc b) noexcept;
void MoveOutOfTheWay(HostLoc reg) noexcept;
void SpillRegister(HostLoc loc) noexcept;
void SpillRegister(BlockOfCode& code, HostLoc loc) noexcept;
HostLoc FindFreeSpill(bool is_xmm) const noexcept;
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
return hostloc_info[static_cast<size_t>(loc)];
return hostloc_info[size_t(loc)];
}
inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept {
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
return hostloc_info[static_cast<size_t>(loc)];
return hostloc_info[size_t(loc)];
}
void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
void EmitExchange(const HostLoc a, const HostLoc b) noexcept;
void EmitMove(BlockOfCode& code, const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
void EmitExchange(BlockOfCode& code, const HostLoc a, const HostLoc b) noexcept;
//data
alignas(64) boost::container::static_vector<HostLoc, 28> gpr_order;
alignas(64) boost::container::static_vector<HostLoc, 28> xmm_order;
alignas(64) std::array<HostLocInfo, NonSpillHostLocCount + SpillCount> hostloc_info;
BlockOfCode* code = nullptr;
size_t reserved_stack_space = 0;
};
// Ensure a cache line (or less) is used, this is primordial