[dynarmic] use bitset<32> instead of vector for ABI regset allocations
HLE macro does this, may as well do it on dynarm as well :) Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
ee428deb1e
commit
ccdb9b9d4c
|
|
@ -188,7 +188,7 @@ void A64EmitX64::ClearFastDispatchTable() {
|
|||
|
||||
void A64EmitX64::GenTerminalHandlers() {
|
||||
// PC ends up in rcx, location_descriptor ends up in rbx
|
||||
static_assert(std::find(ABI_ALL_CALLEE_SAVE.begin(), ABI_ALL_CALLEE_SAVE.end(), HostLoc::R12) != ABI_ALL_CALLEE_SAVE.end());
|
||||
//static_assert(ABI_ALL_CALLEE_SAVE.test(size_t(HostLoc::R12)));
|
||||
const auto calculate_location_descriptor = [this] {
|
||||
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
|
||||
// TODO: Optimization is available here based on known state of fpcr.
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -40,59 +40,53 @@ static FrameInfo CalculateFrameInfo(const size_t num_gprs, const size_t num_xmms
|
|||
};
|
||||
}
|
||||
|
||||
template<typename RegisterArrayT>
|
||||
void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const RegisterArrayT& regs) {
|
||||
void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, std::bitset<32> const& regs) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
|
||||
const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
|
||||
const size_t num_gprs = (ABI_ALL_GPRS & regs).count();
|
||||
const size_t num_xmms = (ABI_ALL_XMMS & regs).count();
|
||||
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
for (auto const gpr : regs)
|
||||
if (HostLocIsGPR(gpr))
|
||||
code.push(HostLocToReg64(gpr));
|
||||
for (size_t i = 0; i < regs.size(); ++i)
|
||||
if (regs[i] && HostLocIsGPR(HostLoc(i)))
|
||||
code.push(HostLocToReg64(HostLoc(i)));
|
||||
if (frame_info.stack_subtraction != 0)
|
||||
code.sub(rsp, u32(frame_info.stack_subtraction));
|
||||
size_t xmm_offset = frame_info.xmm_offset;
|
||||
for (auto const xmm : regs) {
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
for (size_t i = 0; i < regs.size(); ++i) {
|
||||
if (regs[i] && HostLocIsXMM(HostLoc(i))) {
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm));
|
||||
code.vmovaps(code.xword[rsp + xmm_offset], HostLocToXmm(HostLoc(i)));
|
||||
} else {
|
||||
code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(xmm));
|
||||
code.movaps(code.xword[rsp + xmm_offset], HostLocToXmm(HostLoc(i)));
|
||||
}
|
||||
xmm_offset += XMM_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename RegisterArrayT>
|
||||
void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, const RegisterArrayT& regs) {
|
||||
void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size, std::bitset<32> const& regs) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
const size_t num_gprs = std::count_if(regs.begin(), regs.end(), HostLocIsGPR);
|
||||
const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
|
||||
const size_t num_gprs = (ABI_ALL_GPRS & regs).count();
|
||||
const size_t num_xmms = (ABI_ALL_XMMS & regs).count();
|
||||
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE);
|
||||
for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
|
||||
auto const xmm = *it;
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
for (int32_t i = regs.size() - 1; i >= 0; --i)
|
||||
if (regs[i] && HostLocIsXMM(HostLoc(i))) {
|
||||
xmm_offset -= XMM_SIZE;
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
|
||||
code.vmovaps(HostLocToXmm(HostLoc(i)), code.xword[rsp + xmm_offset]);
|
||||
} else {
|
||||
code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
|
||||
code.movaps(HostLocToXmm(HostLoc(i)), code.xword[rsp + xmm_offset]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (frame_info.stack_subtraction != 0)
|
||||
code.add(rsp, u32(frame_info.stack_subtraction));
|
||||
for (auto it = regs.rbegin(); it != regs.rend(); ++it) {
|
||||
auto const gpr = *it;
|
||||
if (HostLocIsGPR(gpr))
|
||||
code.pop(HostLocToReg64(gpr));
|
||||
}
|
||||
for (int32_t i = regs.size() - 1; i >= 0; --i)
|
||||
if (regs[i] && HostLocIsGPR(HostLoc(i)))
|
||||
code.pop(HostLocToReg64(HostLoc(i)));
|
||||
}
|
||||
|
||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) {
|
||||
|
|
@ -112,74 +106,16 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size
|
|||
}
|
||||
|
||||
// Windows ABI registers are not in the same allocation algorithm as unix's
|
||||
#ifdef _MSC_VER
|
||||
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
std::vector<HostLoc> regs;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||
std::bitset<32> regs = ABI_ALL_CALLER_SAVE;
|
||||
regs.reset(size_t(exception));
|
||||
ABI_PushRegistersAndAdjustStack(code, 0, regs);
|
||||
}
|
||||
|
||||
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
std::vector<HostLoc> regs;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||
std::bitset<32> regs = ABI_ALL_CALLER_SAVE;
|
||||
regs.reset(size_t(exception));
|
||||
ABI_PopRegistersAndAdjustStack(code, 0, regs);
|
||||
}
|
||||
#else
|
||||
static consteval size_t ABI_AllCallerSaveSize() noexcept {
|
||||
return ABI_ALL_CALLER_SAVE.max_size();
|
||||
}
|
||||
static consteval std::array<HostLoc, ABI_AllCallerSaveSize() - 1> ABI_AllCallerSaveExcept(const std::size_t except) noexcept {
|
||||
std::array<HostLoc, ABI_AllCallerSaveSize() - 1> arr;
|
||||
for(std::size_t i = 0; i < arr.size(); ++i) {
|
||||
arr[i] = static_cast<HostLoc>(i + (i >= except ? 1 : 0));
|
||||
}
|
||||
return arr;
|
||||
}
|
||||
|
||||
alignas(64) static constinit std::array<HostLoc, ABI_AllCallerSaveSize() - 1> ABI_CALLER_SAVED_EXCEPT_TABLE[32] = {
|
||||
ABI_AllCallerSaveExcept(0),
|
||||
ABI_AllCallerSaveExcept(1),
|
||||
ABI_AllCallerSaveExcept(2),
|
||||
ABI_AllCallerSaveExcept(3),
|
||||
ABI_AllCallerSaveExcept(4),
|
||||
ABI_AllCallerSaveExcept(5),
|
||||
ABI_AllCallerSaveExcept(6),
|
||||
ABI_AllCallerSaveExcept(7),
|
||||
ABI_AllCallerSaveExcept(8),
|
||||
ABI_AllCallerSaveExcept(9),
|
||||
ABI_AllCallerSaveExcept(10),
|
||||
ABI_AllCallerSaveExcept(11),
|
||||
ABI_AllCallerSaveExcept(12),
|
||||
ABI_AllCallerSaveExcept(13),
|
||||
ABI_AllCallerSaveExcept(14),
|
||||
ABI_AllCallerSaveExcept(15),
|
||||
ABI_AllCallerSaveExcept(16),
|
||||
ABI_AllCallerSaveExcept(17),
|
||||
ABI_AllCallerSaveExcept(18),
|
||||
ABI_AllCallerSaveExcept(19),
|
||||
ABI_AllCallerSaveExcept(20),
|
||||
ABI_AllCallerSaveExcept(21),
|
||||
ABI_AllCallerSaveExcept(22),
|
||||
ABI_AllCallerSaveExcept(23),
|
||||
ABI_AllCallerSaveExcept(24),
|
||||
ABI_AllCallerSaveExcept(25),
|
||||
ABI_AllCallerSaveExcept(26),
|
||||
ABI_AllCallerSaveExcept(27),
|
||||
ABI_AllCallerSaveExcept(28),
|
||||
ABI_AllCallerSaveExcept(29),
|
||||
ABI_AllCallerSaveExcept(30),
|
||||
ABI_AllCallerSaveExcept(31),
|
||||
};
|
||||
|
||||
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
ASSUME(size_t(exception) < 32);
|
||||
ABI_PushRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]);
|
||||
}
|
||||
|
||||
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
ASSUME(size_t(exception) < 32);
|
||||
ABI_PopRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -8,6 +8,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
|
|
@ -17,6 +18,50 @@ namespace Dynarmic::Backend::X64 {
|
|||
|
||||
class BlockOfCode;
|
||||
|
||||
constexpr std::bitset<32> BuildRegSet(std::initializer_list<HostLoc> regs) {
|
||||
size_t bits = 0;
|
||||
for (auto const& reg : regs)
|
||||
bits |= size_t{1} << size_t(reg);
|
||||
return {bits};
|
||||
}
|
||||
|
||||
constexpr std::bitset<32> ABI_ALL_GPRS = BuildRegSet({
|
||||
HostLoc::RAX,
|
||||
HostLoc::RBX,
|
||||
HostLoc::RCX,
|
||||
HostLoc::RDX,
|
||||
HostLoc::RDI,
|
||||
HostLoc::RSI,
|
||||
HostLoc::RBP,
|
||||
HostLoc::RSP,
|
||||
HostLoc::R8,
|
||||
HostLoc::R9,
|
||||
HostLoc::R10,
|
||||
HostLoc::R11,
|
||||
HostLoc::R12,
|
||||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
HostLoc::R15,
|
||||
});
|
||||
constexpr std::bitset<32> ABI_ALL_XMMS = BuildRegSet({
|
||||
HostLoc::XMM0,
|
||||
HostLoc::XMM1,
|
||||
HostLoc::XMM2,
|
||||
HostLoc::XMM3,
|
||||
HostLoc::XMM4,
|
||||
HostLoc::XMM5,
|
||||
HostLoc::XMM6,
|
||||
HostLoc::XMM7,
|
||||
HostLoc::XMM8,
|
||||
HostLoc::XMM9,
|
||||
HostLoc::XMM10,
|
||||
HostLoc::XMM11,
|
||||
HostLoc::XMM12,
|
||||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
});
|
||||
|
||||
constexpr HostLoc ABI_JIT_PTR = HostLoc::R15;
|
||||
#ifdef _WIN32
|
||||
|
||||
|
|
@ -29,7 +74,7 @@ constexpr HostLoc ABI_PARAM2 = HostLoc::RDX;
|
|||
constexpr HostLoc ABI_PARAM3 = HostLoc::R8;
|
||||
constexpr HostLoc ABI_PARAM4 = HostLoc::R9;
|
||||
|
||||
constexpr std::array<HostLoc, 13> ABI_ALL_CALLER_SAVE = {
|
||||
constexpr std::bitset<32> ABI_ALL_CALLER_SAVE = BuildRegSet({
|
||||
HostLoc::RAX,
|
||||
HostLoc::RCX,
|
||||
HostLoc::RDX,
|
||||
|
|
@ -43,9 +88,9 @@ constexpr std::array<HostLoc, 13> ABI_ALL_CALLER_SAVE = {
|
|||
HostLoc::XMM3,
|
||||
HostLoc::XMM4,
|
||||
HostLoc::XMM5,
|
||||
};
|
||||
});
|
||||
|
||||
constexpr std::array<HostLoc, 18> ABI_ALL_CALLEE_SAVE = {
|
||||
constexpr std::bitset<32> ABI_ALL_CALLEE_SAVE = BuildRegSet({
|
||||
HostLoc::RBX,
|
||||
HostLoc::RSI,
|
||||
HostLoc::RDI,
|
||||
|
|
@ -64,7 +109,7 @@ constexpr std::array<HostLoc, 18> ABI_ALL_CALLEE_SAVE = {
|
|||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
});
|
||||
|
||||
constexpr size_t ABI_SHADOW_SPACE = 32; // bytes
|
||||
|
||||
|
|
@ -82,7 +127,7 @@ constexpr HostLoc ABI_PARAM4 = HostLoc::RCX;
|
|||
constexpr HostLoc ABI_PARAM5 = HostLoc::R8;
|
||||
constexpr HostLoc ABI_PARAM6 = HostLoc::R9;
|
||||
|
||||
constexpr std::array<HostLoc, 25> ABI_ALL_CALLER_SAVE = {
|
||||
constexpr std::bitset<32> ABI_ALL_CALLER_SAVE = BuildRegSet({
|
||||
HostLoc::RAX,
|
||||
HostLoc::RCX,
|
||||
HostLoc::RDX,
|
||||
|
|
@ -108,22 +153,22 @@ constexpr std::array<HostLoc, 25> ABI_ALL_CALLER_SAVE = {
|
|||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
});
|
||||
|
||||
constexpr std::array<HostLoc, 6> ABI_ALL_CALLEE_SAVE = {
|
||||
constexpr std::bitset<32> ABI_ALL_CALLEE_SAVE = BuildRegSet({
|
||||
HostLoc::RBX,
|
||||
HostLoc::RBP,
|
||||
HostLoc::R12,
|
||||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
HostLoc::R15,
|
||||
};
|
||||
});
|
||||
|
||||
constexpr size_t ABI_SHADOW_SPACE = 0; // bytes
|
||||
|
||||
#endif
|
||||
|
||||
static_assert(ABI_ALL_CALLER_SAVE.size() + ABI_ALL_CALLEE_SAVE.size() == 31, "Invalid total number of registers");
|
||||
//static_assert(ABI_ALL_CALLER_SAVE.count() + ABI_ALL_CALLEE_SAVE.count() == 31, "Invalid total number of registers");
|
||||
|
||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0);
|
||||
void ABI_PopCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, size_t frame_size = 0);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -336,11 +336,11 @@ void RegAlloc::HostCall(
|
|||
constexpr std::array<HostLoc, args_count> args_hostloc = {ABI_PARAM1, ABI_PARAM2, ABI_PARAM3, ABI_PARAM4};
|
||||
const std::array<std::optional<Argument::copyable_reference>, args_count> args = {arg0, arg1, arg2, arg3};
|
||||
|
||||
static const boost::container::static_vector<HostLoc, 28> other_caller_save = [args_hostloc]() noexcept {
|
||||
boost::container::static_vector<HostLoc, 28> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
|
||||
ret.erase(std::find(ret.begin(), ret.end(), ABI_RETURN));
|
||||
static const std::bitset<32> other_caller_save = [args_hostloc]() noexcept {
|
||||
std::bitset<32> ret = ABI_ALL_CALLER_SAVE;
|
||||
ret.reset(size_t(ABI_RETURN));
|
||||
for (auto const hostloc : args_hostloc)
|
||||
ret.erase(std::find(ret.begin(), ret.end(), hostloc));
|
||||
ret.reset(size_t(hostloc));
|
||||
return ret;
|
||||
}();
|
||||
|
||||
|
|
@ -356,9 +356,11 @@ void RegAlloc::HostCall(
|
|||
}
|
||||
}
|
||||
// Must match with with ScratchImpl
|
||||
for (auto const gpr : other_caller_save) {
|
||||
MoveOutOfTheWay(code, gpr);
|
||||
LocInfo(gpr).WriteLock();
|
||||
for (size_t i = 0; i < other_caller_save.size(); ++i) {
|
||||
if (other_caller_save[i]) {
|
||||
MoveOutOfTheWay(code, HostLoc(i));
|
||||
LocInfo(HostLoc(i)).WriteLock();
|
||||
}
|
||||
}
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i] && !args[i]->get().IsVoid()) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue