[dynarmic] Transition IR::Block to use stable_vector<Inst>, remove inline pool + pooled vector

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-02-14 10:01:40 +00:00
parent 19e2dba35a
commit 9f0d4b21ec
14 changed files with 53 additions and 110 deletions

View File

@ -212,7 +212,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
oaknut::Label pass;
pass = conf.emit_cond(code, ctx, ctx.block.GetCondition());
EmitAddCycles(code, ctx, ctx.block.ConditionFailedCycleCount());
EmitAddCycles(code, ctx, ctx.block.cond_failed_cycle_count);
conf.emit_condition_failed_terminal(code, ctx);
code.l(pass);
@ -254,7 +254,7 @@ EmittedBlockInfo EmitArm64(oaknut::CodeGenerator& code, IR::Block block, const E
reg_alloc.AssertNoMoreUses();
EmitAddCycles(code, ctx, block.CycleCount());
EmitAddCycles(code, ctx, block.cycle_count);
conf.emit_terminal(code, ctx);
code.BRK(0);

View File

@ -151,7 +151,7 @@ EmittedBlockInfo EmitRV64(biscuit::Assembler& as, IR::Block block, const EmitCon
reg_alloc.AssertNoMoreUses();
if (emit_conf.enable_cycle_counting) {
const size_t cycles_to_add = block.CycleCount();
const size_t cycles_to_add = block.cycle_count;
as.LD(Xscratch0, offsetof(StackLayout, cycles_remaining), sp);
if (mcl::bit::sign_extend<12>(-cycles_to_add) == -cycles_to_add) {
as.ADDI(Xscratch0, Xscratch0, -cycles_to_add);

View File

@ -153,7 +153,7 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
reg_alloc.AssertNoMoreUses();
if (conf.enable_cycle_counting) {
EmitAddCycles(block.CycleCount());
EmitAddCycles(block.cycle_count);
}
EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3();
@ -197,7 +197,7 @@ void A32EmitX64::EmitCondPrelude(const A32EmitContext& ctx) {
Xbyak::Label pass = EmitCond(ctx.block.GetCondition());
if (conf.enable_cycle_counting) {
EmitAddCycles(ctx.block.ConditionFailedCycleCount());
EmitAddCycles(ctx.block.cond_failed_cycle_count);
}
EmitTerminal(IR::Term::LinkBlock{ctx.block.ConditionFailedLocation()}, ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.L(pass);

View File

@ -146,7 +146,7 @@ finish_this_inst:
reg_alloc.AssertNoMoreUses();
if (conf.enable_cycle_counting) {
EmitAddCycles(block.CycleCount());
EmitAddCycles(block.cycle_count);
}
EmitTerminal(block.GetTerminal(), ctx.Location().SetSingleStepping(false), ctx.IsSingleStep());
code.int3();

View File

@ -48,7 +48,7 @@ bool IsConditionPassed(TranslatorVisitor& v, IR::Cond cond) {
} else {
if (cond == v.ir.block.GetCondition()) {
v.ir.block.SetConditionFailedLocation(v.ir.current_location.AdvancePC(static_cast<int>(v.current_instruction_size)).AdvanceIT());
v.ir.block.ConditionFailedCycleCount()++;
v.ir.block.cond_failed_cycle_count++;
return true;
}
@ -79,7 +79,7 @@ bool IsConditionPassed(TranslatorVisitor& v, IR::Cond cond) {
v.cond_state = ConditionalState::Translating;
v.ir.block.SetCondition(cond);
v.ir.block.SetConditionFailedLocation(v.ir.current_location.AdvancePC(static_cast<int>(v.current_instruction_size)).AdvanceIT());
v.ir.block.ConditionFailedCycleCount() = v.ir.block.CycleCount() + 1;
v.ir.block.cond_failed_cycle_count = v.ir.block.cycle_count + 1;
return true;
}

View File

@ -61,7 +61,7 @@ void TranslateArm(IR::Block& block, LocationDescriptor descriptor, TranslateCall
}
visitor.ir.current_location = visitor.ir.current_location.AdvancePC(4);
block.CycleCount() += ticks_for_instruction;
block.cycle_count += ticks_for_instruction;
} while (should_continue && CondCanContinue(visitor.cond_state, visitor.ir) && !single_step);
if (visitor.cond_state == ConditionalState::Translating || visitor.cond_state == ConditionalState::Trailing || single_step) {
@ -101,7 +101,7 @@ bool TranslateSingleArmInstruction(IR::Block& block, LocationDescriptor descript
// TODO: Feedback resulting cond status to caller somehow.
visitor.ir.current_location = visitor.ir.current_location.AdvancePC(4);
block.CycleCount() += ticks_for_instruction;
block.cycle_count += ticks_for_instruction;
block.SetEndLocation(visitor.ir.current_location);

View File

@ -161,7 +161,7 @@ void TranslateThumb(IR::Block& block, LocationDescriptor descriptor, TranslateCa
}
visitor.ir.current_location = visitor.ir.current_location.AdvancePC(static_cast<int>(visitor.current_instruction_size)).AdvanceIT();
block.CycleCount() += ticks_for_instruction;
block.cycle_count += ticks_for_instruction;
} while (should_continue && CondCanContinue(visitor.cond_state, visitor.ir) && !single_step);
if (visitor.cond_state == ConditionalState::Translating || visitor.cond_state == ConditionalState::Trailing || single_step) {
@ -214,7 +214,7 @@ bool TranslateSingleThumbInstruction(IR::Block& block, LocationDescriptor descri
const s32 advance_pc = is_thumb_16 ? 2 : 4;
visitor.ir.current_location = visitor.ir.current_location.AdvancePC(advance_pc);
block.CycleCount() += ticks_for_instruction;
block.cycle_count += ticks_for_instruction;
block.SetEndLocation(visitor.ir.current_location);

View File

@ -35,7 +35,7 @@ void Translate(IR::Block& block, LocationDescriptor descriptor, MemoryReadCodeFu
}
visitor.ir.current_location = visitor.ir.current_location->AdvancePC(4);
block.CycleCount()++;
block.cycle_count++;
} while (should_continue && !single_step);
if (single_step && should_continue) {
@ -56,7 +56,7 @@ bool TranslateSingleInstruction(IR::Block& block, LocationDescriptor descriptor,
}
visitor.ir.current_location = visitor.ir.current_location->AdvancePC(4);
block.CycleCount()++;
block.cycle_count++;
block.SetEndLocation(*visitor.ir.current_location);

View File

@ -130,7 +130,7 @@ bool TranslatorVisitor::MRS(Imm<1> o0, Imm<3> op1, Imm<4> CRn, Imm<4> CRm, Imm<3
case SystemRegisterEncoding::CNTPCT_EL0:
// HACK: Ensure that this is the first instruction in the block it's emitted in, so the cycle count is most up-to-date.
if (!ir.block.instructions.empty() && !options.wall_clock_cntpct) {
ir.block.CycleCount()--;
ir.block.cycle_count--;
ir.SetTerm(IR::Term::LinkBlock{*ir.current_location});
return false;
}

View File

@ -30,38 +30,26 @@ Block::Block(LocationDescriptor location) noexcept
/// Prepends a new instruction to this basic block before the insertion point,
/// handling any allocations necessary to do so.
/// @param insertion_point Where to insert the new instruction.
/// @param op Opcode representing the instruction to add.
/// @param args A sequence of Value instances used as arguments for the instruction.
/// @param op Opcode representing the instruction to add.
/// @param args A sequence of Value instances used as arguments for the instruction.
/// @returns Iterator to the newly created instruction.
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
Block::iterator Block::PrependNewInst(Block::const_iterator insertion_point, Opcode opcode, std::initializer_list<Value> args) noexcept {
// First try using the "inline" buffer, otherwise fallback to a slower slab-like allocation scheme
// purpouse is to avoid many calls to new/delete which invoke malloc which invokes mmap
// just pool it!!! - reason why there is an inline buffer is because many small blocks are created
// with few instructions due to subpar optimisations on other passes... plus branch-heavy code will
// hugely benefit from the coherency of faster allocations...
IR::Inst* inst;
if (inlined_inst.size() < inlined_inst.max_size()) {
inlined_inst.emplace_back(opcode);
inst = &inlined_inst[inlined_inst.size() - 1];
} else {
if (pooled_inst.empty() || pooled_inst.back().size() == pooled_inst.back().max_size())
pooled_inst.emplace_back();
pooled_inst.back().emplace_back(opcode);
inst = &pooled_inst.back()[pooled_inst.back().size() - 1];
}
DEBUG_ASSERT(args.size() == inst->NumArgs());
std::for_each(args.begin(), args.end(), [&inst, index = size_t(0)](const auto& arg) mutable {
inst->SetArg(index, arg);
auto it = instructions.insert(insertion_point, Inst(opcode));
DEBUG_ASSERT(args.size() == it->NumArgs());
std::for_each(args.begin(), args.end(), [&it, index = size_t(0)](const auto& arg) mutable {
it->SetArg(index, arg);
index++;
});
return instructions.insert_before(insertion_point, inst);
return it;
}
void Block::Reset(LocationDescriptor location_) noexcept {
mcl::intrusive_list<IR::Inst> tmp = {};
instructions.swap(tmp);
inlined_inst.clear();
pooled_inst.clear();
instructions.clear();
cond_failed.reset();
location = location_;
end_location = location_;
@ -110,7 +98,7 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept {
std::string DumpBlock(const IR::Block& block) noexcept {
std::string ret = fmt::format("Block: location={}-{}\n", block.Location(), block.EndLocation())
+ fmt::format("cycles={}", block.CycleCount())
+ fmt::format("cycles={}", block.cycle_count)
+ fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
if (block.GetCondition() != Cond::AL)
ret += fmt::format(", cond_fail={}", block.ConditionFailedLocation());

View File

@ -16,7 +16,6 @@
#include <boost/container/container_fwd.hpp>
#include <boost/container/static_vector.hpp>
#include <boost/container/stable_vector.hpp>
#include <mcl/container/intrusive_list.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/ir/location_descriptor.h"
@ -33,10 +32,10 @@ enum class Opcode;
/// Note that this is a linear IR and not a pure tree-based IR: i.e.: there is an ordering to
/// the microinstructions. This only matters before chaining is done in order to correctly
/// order memory accesses.
class alignas(4096) Block final {
class Block final {
public:
//using instruction_list_type = dense_list<Inst>;
using instruction_list_type = mcl::intrusive_list<Inst>;
using instruction_list_type = boost::container::stable_vector<Inst>;
using size_type = instruction_list_type::size_type;
using iterator = instruction_list_type::iterator;
using const_iterator = instruction_list_type::const_iterator;
@ -50,25 +49,9 @@ public:
Block(Block&&) = default;
Block& operator=(Block&&) = default;
/// Appends a new instruction to the end of this basic block,
/// handling any allocations necessary to do so.
/// @param op Opcode representing the instruction to add.
/// @param args A sequence of Value instances used as arguments for the instruction.
inline iterator AppendNewInst(const Opcode opcode, const std::initializer_list<IR::Value> args) noexcept {
return PrependNewInst(instructions.end(), opcode, args);
}
iterator PrependNewInst(iterator insertion_point, Opcode op, std::initializer_list<Value> args) noexcept;
iterator PrependNewInst(const_iterator insertion_point, Opcode op, std::initializer_list<Value> args) noexcept;
void Reset(LocationDescriptor location_) noexcept;
/// Gets a mutable reference to the instruction list for this basic block.
inline instruction_list_type& Instructions() noexcept {
return instructions;
}
/// Gets an immutable reference to the instruction list for this basic block.
inline const instruction_list_type& Instructions() const noexcept {
return instructions;
}
/// Gets the starting location for this basic block.
inline LocationDescriptor Location() const noexcept {
return location;
@ -104,15 +87,6 @@ public:
return cond_failed.has_value();
}
/// Gets a mutable reference to the condition failed cycle count.
inline size_t& ConditionFailedCycleCount() noexcept {
return cond_failed_cycle_count;
}
/// Gets an immutable reference to the condition failed cycle count.
inline const size_t& ConditionFailedCycleCount() const noexcept {
return cond_failed_cycle_count;
}
/// Gets the terminal instruction for this basic block.
inline Terminal GetTerminal() const noexcept {
return terminal;
@ -132,21 +106,8 @@ public:
return terminal.which() != 0;
}
/// Gets a mutable reference to the cycle count for this basic block.
inline size_t& CycleCount() noexcept {
return cycle_count;
}
/// Gets an immutable reference to the cycle count for this basic block.
inline const size_t& CycleCount() const noexcept {
return cycle_count;
}
/// "Hot cache" for small blocks so we don't call global allocator
boost::container::static_vector<Inst, 30> inlined_inst;
/// List of instructions in this block.
instruction_list_type instructions;
/// "Long/far" memory pool
boost::container::stable_vector<boost::container::static_vector<Inst, 32>> pooled_inst;
/// Block to execute next if `cond` did not pass.
std::optional<LocationDescriptor> cond_failed = {};
/// Description of the starting location of this block
@ -162,7 +123,7 @@ public:
/// Number of cycles this block takes to execute.
size_t cycle_count = 0;
};
static_assert(sizeof(Block) == 4096);
//static_assert(sizeof(Block) == 120);
/// Returns a string representation of the contents of block. Intended for debugging.
std::string DumpBlock(const IR::Block& block) noexcept;

View File

@ -70,7 +70,10 @@ enum class MemOp {
/// The user of this class updates `current_location` as appropriate.
class IREmitter {
public:
explicit IREmitter(Block& block) : block(block), insertion_point(block.instructions.end()) {}
explicit IREmitter(Block& block) noexcept
: block(block)
, insertion_point(block.instructions.end())
{}
Block& block;
@ -2947,19 +2950,10 @@ public:
block.SetTerminal(terminal);
}
void SetInsertionPointBefore(IR::Inst* new_insertion_point) {
insertion_point = IR::Block::iterator{*new_insertion_point};
}
void SetInsertionPointBefore(IR::Block::iterator new_insertion_point) {
insertion_point = new_insertion_point;
}
void SetInsertionPointAfter(IR::Inst* new_insertion_point) {
insertion_point = IR::Block::iterator{*new_insertion_point};
++insertion_point;
}
void SetInsertionPointAfter(IR::Block::iterator new_insertion_point) {
insertion_point = new_insertion_point;
++insertion_point;
@ -2970,7 +2964,10 @@ protected:
template<typename T = Value, typename... Args>
T Inst(Opcode op, Args... args) {
auto const offset = std::distance(block.instructions.begin(), insertion_point);
auto const at_end = block.instructions.end() == insertion_point;
auto iter = block.PrependNewInst(insertion_point, op, {Value(args)...});
insertion_point = at_end ? block.instructions.end() : block.instructions.begin() + (offset + 1);
return T(Value(&*iter));
}
};

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
/* This file is part of the dynarmic project.
@ -10,9 +10,7 @@
#include <array>
#include <mcl/container/intrusive_list.hpp>
#include "dynarmic/common/common_types.h"
#include "dynarmic/ir/value.h"
#include "dynarmic/ir/opcodes.h"
@ -26,7 +24,7 @@ constexpr size_t max_arg_count = 4;
/// A representation of a microinstruction. A single ARM/Thumb instruction may be
/// converted into zero or more microinstructions.
//class Inst final {
class Inst final : public mcl::intrusive_list_node<Inst> {
class Inst final {
public:
explicit Inst(Opcode op) : op(op) {}

View File

@ -86,12 +86,10 @@ static void ConstantMemoryReads(IR::Block& block, A32::UserCallbacks* cb) {
}
static void FlagsPass(IR::Block& block) {
using Iterator = typename std::reverse_iterator<IR::Block::iterator>;
struct FlagInfo {
bool set_not_required = false;
bool has_value_request = false;
Iterator value_request = {};
IR::Block::reverse_iterator value_request = {};
};
struct ValuelessFlagInfo {
bool set_not_required = false;
@ -102,7 +100,7 @@ static void FlagsPass(IR::Block& block) {
FlagInfo c_flag;
FlagInfo ge;
auto do_set = [&](FlagInfo& info, IR::Value value, Iterator inst) {
auto do_set = [&](FlagInfo& info, IR::Value value, IR::Block::reverse_iterator inst) {
if (info.has_value_request) {
info.value_request->ReplaceUsesWith(value);
}
@ -114,14 +112,14 @@ static void FlagsPass(IR::Block& block) {
info.set_not_required = true;
};
auto do_set_valueless = [&](ValuelessFlagInfo& info, Iterator inst) {
auto do_set_valueless = [](ValuelessFlagInfo& info, IR::Block::reverse_iterator inst) {
if (info.set_not_required) {
inst->Invalidate();
}
info.set_not_required = true;
};
auto do_get = [](FlagInfo& info, Iterator inst) {
auto do_get = [](FlagInfo& info, IR::Block::reverse_iterator inst) {
if (info.has_value_request) {
info.value_request->ReplaceUsesWith(IR::Value{&*inst});
}
@ -448,7 +446,8 @@ static void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf)
return;
}
for (auto& inst : block.instructions) {
for (auto it = block.instructions.begin(); it != block.instructions.end(); it++) {
auto& inst = *it;
if (inst.GetOpcode() != IR::Opcode::A64DataCacheOperationRaised) {
continue;
}
@ -457,7 +456,7 @@ static void A64CallbackConfigPass(IR::Block& block, const A64::UserConfig& conf)
if (op == A64::DataCacheOperation::ZeroByVA) {
A64::IREmitter ir{block};
ir.current_location = A64::LocationDescriptor{IR::LocationDescriptor{inst.GetArg(0).GetU64()}};
ir.SetInsertionPointBefore(&inst);
ir.SetInsertionPointBefore(it);
size_t bytes = 4 << static_cast<size_t>(conf.dczid_el0 & 0b1111);
IR::U64 addr{inst.GetArg(2)};
@ -522,7 +521,7 @@ static void A64GetSetElimination(IR::Block& block) {
const auto do_set = [&block](RegisterInfo& info, IR::Value value, Iterator set_inst, TrackingType tracking_type) {
if (info.set_instruction_present) {
info.last_set_instruction->Invalidate();
block.Instructions().erase(info.last_set_instruction);
block.instructions.erase(info.last_set_instruction);
}
info.register_value = value;
info.tracking_type = tracking_type;
@ -541,7 +540,7 @@ static void A64GetSetElimination(IR::Block& block) {
ReplaceUsesWith(*get_inst, true, u32(info.register_value.GetImmediateAsU64()));
} else {
A64::IREmitter ir{block};
ir.SetInsertionPointBefore(&*get_inst);
ir.SetInsertionPointBefore(get_inst);
get_inst->ReplaceUsesWith(ir.LeastSignificantWord(IR::U64{info.register_value}));
}
} else {
@ -647,7 +646,7 @@ static void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb
IR::Block new_block{location};
A64::TranslateSingleInstruction(new_block, location, *instruction);
if (!new_block.Instructions().empty())
if (!new_block.instructions.empty())
return false;
const IR::Terminal terminal = new_block.GetTerminal();
@ -672,7 +671,7 @@ static void A64MergeInterpretBlocksPass(IR::Block& block, A64::UserCallbacks* cb
term->num_instructions = num_instructions;
block.ReplaceTerminal(terminal);
block.CycleCount() += num_instructions - 1;
block.cycle_count += num_instructions - 1;
}
using Op = Dynarmic::IR::Opcode;
@ -1241,7 +1240,7 @@ static void IdentityRemovalPass(IR::Block& block) {
}
if (it->GetOpcode() == IR::Opcode::Identity || it->GetOpcode() == IR::Opcode::Void) {
to_invalidate.push_back(&*it);
it = block.Instructions().erase(it);
it = block.instructions.erase(it);
} else {
++it;
}
@ -1401,9 +1400,9 @@ static void PolyfillPass(IR::Block& block, const PolyfillOptions& polyfill) {
IR::IREmitter ir{block};
for (auto& inst : block.instructions) {
ir.SetInsertionPointBefore(&inst);
for (auto it = block.instructions.begin(); it != block.instructions.end(); it++) {
auto& inst = *it;
ir.SetInsertionPointBefore(it);
switch (inst.GetOpcode()) {
case IR::Opcode::SHA256MessageSchedule0:
if (polyfill.sha256) {