fix stuff
This commit is contained in:
parent
786e288326
commit
57299efc5b
|
|
@ -102,19 +102,14 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
|||
}
|
||||
|
||||
code.EnableWriting();
|
||||
|
||||
const boost::container::static_vector<HostLoc, 28> gpr_order = [this] {
|
||||
boost::container::static_vector<HostLoc, 28> gprs{any_gpr};
|
||||
if (conf.fastmem_pointer) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13));
|
||||
}
|
||||
if (conf.page_table) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
||||
}
|
||||
new (&this->reg_alloc) RegAlloc([this] {
|
||||
std::bitset<32> gprs{any_gpr};
|
||||
if (conf.fastmem_pointer)
|
||||
gprs.reset(size_t(HostLoc::R13));
|
||||
if (conf.page_table)
|
||||
gprs.reset(size_t(HostLoc::R14));
|
||||
return gprs;
|
||||
}();
|
||||
|
||||
new (&this->reg_alloc) RegAlloc(gpr_order, any_xmm);
|
||||
}(), any_xmm);
|
||||
A32EmitContext ctx{conf, reg_alloc, block};
|
||||
|
||||
// Start emitting.
|
||||
|
|
|
|||
|
|
@ -76,18 +76,14 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
|
|||
}
|
||||
|
||||
code.EnableWriting();
|
||||
const boost::container::static_vector<HostLoc, 28> gpr_order = [this] {
|
||||
boost::container::static_vector<HostLoc, 28> gprs{any_gpr};
|
||||
if (conf.fastmem_pointer) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13));
|
||||
}
|
||||
if (conf.page_table) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
||||
}
|
||||
new (&this->reg_alloc) RegAlloc{[this] {
|
||||
std::bitset<32> gprs = any_gpr;
|
||||
if (conf.fastmem_pointer)
|
||||
gprs.reset(size_t(HostLoc::R13));
|
||||
if (conf.page_table)
|
||||
gprs.reset(size_t(HostLoc::R14));
|
||||
return gprs;
|
||||
}();
|
||||
|
||||
new (&this->reg_alloc) RegAlloc{gpr_order, any_xmm};
|
||||
}(), any_xmm};
|
||||
A64EmitContext ctx{conf, reg_alloc, block};
|
||||
|
||||
// Start emitting.
|
||||
|
|
|
|||
|
|
@ -11,20 +11,12 @@
|
|||
#include <bitset>
|
||||
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/backend/x64/hostloc.h"
|
||||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
class BlockOfCode;
|
||||
|
||||
constexpr std::bitset<32> BuildRegSet(std::initializer_list<HostLoc> regs) {
|
||||
size_t bits = 0;
|
||||
for (auto const& reg : regs)
|
||||
bits |= size_t{1} << size_t(reg);
|
||||
return {bits};
|
||||
}
|
||||
|
||||
constexpr std::bitset<32> ABI_ALL_GPRS = BuildRegSet({
|
||||
HostLoc::RAX,
|
||||
HostLoc::RBX,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -7,9 +7,11 @@
|
|||
*/
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
|
|
@ -106,13 +108,18 @@ constexpr size_t HostLocBitWidth(HostLoc loc) {
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
using HostLocList = std::initializer_list<HostLoc>;
|
||||
constexpr std::bitset<32> BuildRegSet(std::initializer_list<HostLoc> regs) {
|
||||
size_t bits = 0;
|
||||
for (auto const& reg : regs)
|
||||
bits |= size_t{1} << size_t(reg);
|
||||
return {bits};
|
||||
}
|
||||
|
||||
// RSP is preserved for function calls
|
||||
// R13 contains fastmem pointer if any
|
||||
// R14 contains the pagetable pointer
|
||||
// R15 contains the JitState pointer
|
||||
const HostLocList any_gpr = {
|
||||
const std::bitset<32> any_gpr = BuildRegSet({
|
||||
HostLoc::RAX,
|
||||
HostLoc::RBX,
|
||||
HostLoc::RCX,
|
||||
|
|
@ -128,13 +135,13 @@ const HostLocList any_gpr = {
|
|||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
//HostLoc::R15,
|
||||
};
|
||||
});
|
||||
|
||||
// XMM0 is reserved for use by instructions that implicitly use it as an argument
|
||||
// XMM1 is used by 128 mem accessors
|
||||
// XMM2 is also used by that (and other stuff)
|
||||
// Basically dont use either XMM0, XMM1 or XMM2 ever; they're left for the regsel
|
||||
const HostLocList any_xmm = {
|
||||
const std::bitset<32> any_xmm = BuildRegSet({
|
||||
//HostLoc::XMM1,
|
||||
//HostLoc::XMM2,
|
||||
HostLoc::XMM3,
|
||||
|
|
@ -150,7 +157,7 @@ const HostLocList any_xmm = {
|
|||
HostLoc::XMM13,
|
||||
HostLoc::XMM14,
|
||||
HostLoc::XMM15,
|
||||
};
|
||||
});
|
||||
|
||||
inline Xbyak::Reg64 HostLocToReg64(HostLoc loc) noexcept {
|
||||
ASSERT(HostLocIsGPR(loc));
|
||||
|
|
|
|||
|
|
@ -6,19 +6,19 @@
|
|||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include "dynarmic/backend/x64/reg_alloc.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
|
||||
#include <fmt/ostream.h>
|
||||
#include "dynarmic/backend/x64/hostloc.h"
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include <bit>
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
#include "dynarmic/backend/x64/abi.h"
|
||||
#include "dynarmic/backend/x64/reg_alloc.h"
|
||||
#include "dynarmic/backend/x64/stack_layout.h"
|
||||
#include "dynarmic/backend/x64/verbose_debugging_output.h"
|
||||
|
||||
|
|
@ -185,9 +185,8 @@ bool Argument::IsInMemory(RegAlloc& reg_alloc) const noexcept {
|
|||
return HostLocIsSpill(*reg_alloc.ValueLocation(value.GetInst()));
|
||||
}
|
||||
|
||||
RegAlloc::RegAlloc(boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept
|
||||
: gpr_order(gpr_order),
|
||||
xmm_order(xmm_order)
|
||||
RegAlloc::RegAlloc(std::bitset<32> gpr_order, std::bitset<32> xmm_order) noexcept
|
||||
: gpr_order(gpr_order), xmm_order(xmm_order)
|
||||
{}
|
||||
|
||||
RegAlloc::ArgumentInfo RegAlloc::GetArgumentInfo(const IR::Inst* inst) noexcept {
|
||||
|
|
@ -237,7 +236,7 @@ Xbyak::Xmm RegAlloc::UseScratchXmm(BlockOfCode& code, Argument& arg) noexcept {
|
|||
void RegAlloc::UseScratch(BlockOfCode& code, Argument& arg, HostLoc host_loc) noexcept {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
UseScratchImpl(code, arg.value, {host_loc});
|
||||
UseScratchImpl(code, arg.value, BuildRegSet({host_loc}));
|
||||
}
|
||||
|
||||
void RegAlloc::DefineValue(BlockOfCode& code, IR::Inst* inst, const Xbyak::Reg& reg) noexcept {
|
||||
|
|
@ -258,7 +257,7 @@ void RegAlloc::Release(const Xbyak::Reg& reg) noexcept {
|
|||
LocInfo(hostloc).ReleaseOne();
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::UseImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
|
||||
HostLoc RegAlloc::UseImpl(BlockOfCode& code, IR::Value use_value, std::bitset<32> desired_locations) noexcept {
|
||||
if (use_value.IsImmediate()) {
|
||||
return LoadImmediate(code, use_value, ScratchImpl(code, desired_locations));
|
||||
}
|
||||
|
|
@ -266,8 +265,7 @@ HostLoc RegAlloc::UseImpl(BlockOfCode& code, IR::Value use_value, const boost::c
|
|||
auto const* use_inst = use_value.GetInst();
|
||||
HostLoc const current_location = *ValueLocation(use_inst);
|
||||
|
||||
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
|
||||
if (can_use_current_location) {
|
||||
if (HostLocIsRegister(current_location) && desired_locations.test(size_t(current_location))) {
|
||||
LocInfo(current_location).ReadLock();
|
||||
return current_location;
|
||||
}
|
||||
|
|
@ -290,7 +288,7 @@ HostLoc RegAlloc::UseImpl(BlockOfCode& code, IR::Value use_value, const boost::c
|
|||
return destination_location;
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::UseScratchImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
|
||||
HostLoc RegAlloc::UseScratchImpl(BlockOfCode& code, IR::Value use_value, std::bitset<32> desired_locations) noexcept {
|
||||
if (use_value.IsImmediate()) {
|
||||
return LoadImmediate(code, use_value, ScratchImpl(code, desired_locations));
|
||||
}
|
||||
|
|
@ -298,9 +296,7 @@ HostLoc RegAlloc::UseScratchImpl(BlockOfCode& code, IR::Value use_value, const b
|
|||
const auto* use_inst = use_value.GetInst();
|
||||
const HostLoc current_location = *ValueLocation(use_inst);
|
||||
const size_t bit_width = GetBitWidth(use_inst->GetType());
|
||||
|
||||
const bool can_use_current_location = std::find(desired_locations.begin(), desired_locations.end(), current_location) != desired_locations.end();
|
||||
if (can_use_current_location && !LocInfo(current_location).IsLocked()) {
|
||||
if (HostLocIsRegister(current_location) && desired_locations.test(size_t(current_location)) && !LocInfo(current_location).IsLocked()) {
|
||||
if (LocInfo(current_location).IsLastUse()) {
|
||||
LocInfo(current_location).is_set_last_use = true;
|
||||
} else {
|
||||
|
|
@ -317,7 +313,7 @@ HostLoc RegAlloc::UseScratchImpl(BlockOfCode& code, IR::Value use_value, const b
|
|||
return destination_location;
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::ScratchImpl(BlockOfCode& code, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept {
|
||||
HostLoc RegAlloc::ScratchImpl(BlockOfCode& code, std::bitset<32> desired_locations) noexcept {
|
||||
const HostLoc location = SelectARegister(desired_locations);
|
||||
MoveOutOfTheWay(code, location);
|
||||
LocInfo(location).WriteLock();
|
||||
|
|
@ -399,46 +395,48 @@ void RegAlloc::ReleaseStackSpace(BlockOfCode& code, const size_t stack_space) no
|
|||
code.add(code.rsp, u32(stack_space));
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept {
|
||||
HostLoc RegAlloc::SelectARegister(std::bitset<32> desired_locations) const noexcept {
|
||||
// TODO(lizzie): Overspill causes issues (reads to 0 and such) on some games, I need to make a testbench
|
||||
// to later track this down - however I just modified the LRU algo so it prefers empty registers first
|
||||
// we need to test high register pressure (and spills, maybe 32 regs?)
|
||||
|
||||
static_assert(size_t(HostLoc::FirstSpill) >= 32);
|
||||
// Selects the best location out of the available locations.
|
||||
// NOTE: Using last is BAD because new REX prefix for each insn using the last regs
|
||||
// TODO: Actually do LRU or something. Currently we just try to pick something without a value if possible.
|
||||
auto min_lru_counter = size_t(-1);
|
||||
auto it_candidate = desired_locations.cend(); //default fallback if everything fails
|
||||
auto it_rex_candidate = desired_locations.cend();
|
||||
auto it_empty_candidate = desired_locations.cend();
|
||||
for (auto it = desired_locations.cbegin(); it != desired_locations.cend(); it++) {
|
||||
auto const& loc_info = LocInfo(*it);
|
||||
DEBUG_ASSERT(*it != ABI_JIT_PTR);
|
||||
// Abstain from using upper registers unless absolutely nescesary
|
||||
if (loc_info.IsLocked()) {
|
||||
// skip, not suitable for allocation
|
||||
// While R13 and R14 are technically available, we avoid allocating for them
|
||||
// at all costs, because theoretically skipping them is better than spilling
|
||||
// all over the place - it also fixes bugs with high reg pressure
|
||||
} else if (*it >= HostLoc::R13 && *it <= HostLoc::R15) {
|
||||
// skip, do not touch
|
||||
// Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL)
|
||||
} else if (loc_info.IsEmpty()) {
|
||||
it_empty_candidate = it;
|
||||
break;
|
||||
// No empty registers for some reason (very evil) - just do normal LRU
|
||||
} else if (loc_info.lru_counter < min_lru_counter) {
|
||||
// Otherwise a "quasi"-LRU
|
||||
min_lru_counter = loc_info.lru_counter;
|
||||
if (*it >= HostLoc::R8 && *it <= HostLoc::R15) {
|
||||
it_rex_candidate = it;
|
||||
} else {
|
||||
it_candidate = it;
|
||||
auto it_candidate = HostLoc::FirstSpill; //default fallback if everything fails
|
||||
auto it_rex_candidate = HostLoc::FirstSpill;
|
||||
auto it_empty_candidate = HostLoc::FirstSpill;
|
||||
for (HostLoc i = HostLoc(0); i < HostLoc(desired_locations.size()); i = HostLoc(size_t(i) + 1)) {
|
||||
if (desired_locations.test(size_t(i))) {
|
||||
auto const& loc_info = LocInfo(i);
|
||||
DEBUG_ASSERT(i != ABI_JIT_PTR);
|
||||
// Abstain from using upper registers unless absolutely nescesary
|
||||
if (loc_info.IsLocked()) {
|
||||
// skip, not suitable for allocation
|
||||
// While R13 and R14 are technically available, we avoid allocating for them
|
||||
// at all costs, because theoretically skipping them is better than spilling
|
||||
// all over the place - i also fixes bugs with high reg pressure
|
||||
} else if (i >= HostLoc::R13 && i <= HostLoc::R15) {
|
||||
// skip, do not touch
|
||||
// Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL)
|
||||
} else if (loc_info.IsEmpty()) {
|
||||
it_empty_candidate = i;
|
||||
break;
|
||||
// No empty registers for some reason (very evil) - just do normal LRU
|
||||
} else if (loc_info.lru_counter < min_lru_counter) {
|
||||
// Otherwise a "quasi"-LRU
|
||||
min_lru_counter = loc_info.lru_counter;
|
||||
if (i >= HostLoc::R8 && i <= HostLoc::R15) {
|
||||
it_rex_candidate = i;
|
||||
} else {
|
||||
it_candidate = i;
|
||||
}
|
||||
// There used to be a break here - DO NOT BREAK away you MUST
|
||||
// evaluate ALL of the registers BEFORE making a decision on when to take
|
||||
// otherwise reg pressure will get high and bugs will seep :)
|
||||
// TODO(lizzie): Investigate these god awful annoying reg pressure issues
|
||||
}
|
||||
// There used to be a break here - DO NOT BREAK away you MUST
|
||||
// evaluate ALL of the registers BEFORE making a decision on when to take
|
||||
// otherwise reg pressure will get high and bugs will seep :)
|
||||
// TODO(lizzie): Investigate these god awful annoying reg pressure issues
|
||||
}
|
||||
}
|
||||
// Final resolution goes as follows:
|
||||
|
|
@ -447,13 +445,13 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
|
|||
// 3 => Try using a REX prefixed one
|
||||
// We avoid using REX-addressable registers because they add +1 REX prefix which
|
||||
// do we really need? The trade-off may not be worth it.
|
||||
auto const it_final = it_empty_candidate != desired_locations.cend()
|
||||
? it_empty_candidate : it_candidate != desired_locations.cend()
|
||||
auto const it_final = it_empty_candidate != HostLoc::FirstSpill
|
||||
? it_empty_candidate : it_candidate != HostLoc::FirstSpill
|
||||
? it_candidate : it_rex_candidate;
|
||||
ASSERT(it_final != desired_locations.cend() && "All candidate registers have already been allocated");
|
||||
ASSERT(it_final != HostLoc::FirstSpill && "All candidate registers have already been allocated");
|
||||
// Evil magic - increment LRU counter (will wrap at 256)
|
||||
const_cast<RegAlloc*>(this)->LocInfo(*it_final).lru_counter++;
|
||||
return *it_final;
|
||||
const_cast<RegAlloc*>(this)->LocInfo(HostLoc(it_final)).lru_counter++;
|
||||
return HostLoc(it_final);
|
||||
}
|
||||
|
||||
std::optional<HostLoc> RegAlloc::ValueLocation(const IR::Inst* value) const noexcept {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
/* This file is part of the dynarmic project.
|
||||
|
|
@ -139,7 +139,7 @@ class RegAlloc final {
|
|||
public:
|
||||
using ArgumentInfo = std::array<Argument, IR::max_arg_count>;
|
||||
RegAlloc() noexcept = default;
|
||||
RegAlloc(boost::container::static_vector<HostLoc, 28> gpr_order, boost::container::static_vector<HostLoc, 28> xmm_order) noexcept;
|
||||
RegAlloc(std::bitset<32> gpr_order, std::bitset<32> xmm_order) noexcept;
|
||||
|
||||
ArgumentInfo GetArgumentInfo(const IR::Inst* inst) noexcept;
|
||||
void RegisterPseudoOperation(const IR::Inst* inst) noexcept;
|
||||
|
|
@ -162,7 +162,7 @@ public:
|
|||
inline void Use(BlockOfCode& code, Argument& arg, const HostLoc host_loc) noexcept {
|
||||
ASSERT(!arg.allocated);
|
||||
arg.allocated = true;
|
||||
UseImpl(code, arg.value, {host_loc});
|
||||
UseImpl(code, arg.value, BuildRegSet({host_loc}));
|
||||
}
|
||||
|
||||
Xbyak::Reg64 UseScratchGpr(BlockOfCode& code, Argument& arg) noexcept;
|
||||
|
|
@ -178,13 +178,13 @@ public:
|
|||
return HostLocToReg64(ScratchImpl(code, gpr_order));
|
||||
}
|
||||
inline Xbyak::Reg64 ScratchGpr(BlockOfCode& code, const HostLoc desired_location) noexcept {
|
||||
return HostLocToReg64(ScratchImpl(code, {desired_location}));
|
||||
return HostLocToReg64(ScratchImpl(code, BuildRegSet({desired_location})));
|
||||
}
|
||||
inline Xbyak::Xmm ScratchXmm(BlockOfCode& code) noexcept {
|
||||
return HostLocToXmm(ScratchImpl(code, xmm_order));
|
||||
}
|
||||
inline Xbyak::Xmm ScratchXmm(BlockOfCode& code, HostLoc desired_location) noexcept {
|
||||
return HostLocToXmm(ScratchImpl(code, {desired_location}));
|
||||
return HostLocToXmm(ScratchImpl(code, BuildRegSet({desired_location})));
|
||||
}
|
||||
|
||||
void HostCall(
|
||||
|
|
@ -216,11 +216,11 @@ public:
|
|||
private:
|
||||
friend struct Argument;
|
||||
|
||||
HostLoc SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept;
|
||||
HostLoc SelectARegister(std::bitset<32> desired_locations) const noexcept;
|
||||
std::optional<HostLoc> ValueLocation(const IR::Inst* value) const noexcept;
|
||||
HostLoc UseImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
|
||||
HostLoc UseScratchImpl(BlockOfCode& code, IR::Value use_value, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
|
||||
HostLoc ScratchImpl(BlockOfCode& code, const boost::container::static_vector<HostLoc, 28>& desired_locations) noexcept;
|
||||
HostLoc UseImpl(BlockOfCode& code, IR::Value use_value, std::bitset<32> desired_locations) noexcept;
|
||||
HostLoc UseScratchImpl(BlockOfCode& code, IR::Value use_value, std::bitset<32> desired_locations) noexcept;
|
||||
HostLoc ScratchImpl(BlockOfCode& code, std::bitset<32> desired_locations) noexcept;
|
||||
void DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, HostLoc host_loc) noexcept;
|
||||
void DefineValueImpl(BlockOfCode& code, IR::Inst* def_inst, const IR::Value& use_inst) noexcept;
|
||||
|
||||
|
|
@ -246,12 +246,10 @@ private:
|
|||
void EmitExchange(BlockOfCode& code, const HostLoc a, const HostLoc b) noexcept;
|
||||
|
||||
//data
|
||||
alignas(64) boost::container::static_vector<HostLoc, 28> gpr_order;
|
||||
alignas(64) boost::container::static_vector<HostLoc, 28> xmm_order;
|
||||
alignas(64) std::array<HostLocInfo, NonSpillHostLocCount + SpillCount> hostloc_info;
|
||||
std::bitset<32> gpr_order;
|
||||
std::bitset<32> xmm_order;
|
||||
size_t reserved_stack_space = 0;
|
||||
};
|
||||
// Ensure a cache line (or less) is used, this is primordial
|
||||
static_assert(sizeof(boost::container::static_vector<HostLoc, 28>) < 64);
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
|||
Loading…
Reference in New Issue