diff --git a/src/audio_core/adsp/adsp.cpp b/src/audio_core/adsp/adsp.cpp
index 48f0a63d4a..a578461f7c 100644
--- a/src/audio_core/adsp/adsp.cpp
+++ b/src/audio_core/adsp/adsp.cpp
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
@@ -7,8 +10,8 @@
 namespace AudioCore::ADSP {
 
 ADSP::ADSP(Core::System& system, Sink::Sink& sink) {
-    audio_renderer = std::make_unique<AudioRenderer::AudioRenderer>(system, sink);
-    opus_decoder = std::make_unique<OpusDecoder::OpusDecoder>(system);
+    audio_renderer.emplace(system, sink);
+    opus_decoder.emplace(system);
     opus_decoder->Send(Direction::DSP, OpusDecoder::Message::Start);
     if (opus_decoder->Receive(Direction::Host) != OpusDecoder::Message::StartOK) {
         LOG_ERROR(Service_Audio, "OpusDecoder failed to initialize.");
@@ -17,11 +20,11 @@ ADSP::ADSP(Core::System& system, Sink::Sink& sink) {
 }
 
 AudioRenderer::AudioRenderer& ADSP::AudioRenderer() {
-    return *audio_renderer.get();
+    return *audio_renderer;
 }
 
 OpusDecoder::OpusDecoder& ADSP::OpusDecoder() {
-    return *opus_decoder.get();
+    return *opus_decoder;
 }
 
 } // namespace AudioCore::ADSP
diff --git a/src/audio_core/adsp/adsp.h b/src/audio_core/adsp/adsp.h
index a0c24a16a2..028d87939d 100644
--- a/src/audio_core/adsp/adsp.h
+++ b/src/audio_core/adsp/adsp.h
@@ -1,8 +1,13 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #pragma once
 
+#include <optional>
+
 #include "audio_core/adsp/apps/audio_renderer/audio_renderer.h"
 #include "audio_core/adsp/apps/opus/opus_decoder.h"
 #include "common/common_types.h"
@@ -45,8 +50,8 @@ public:
 
 private:
     /// AudioRenderer app
-    std::unique_ptr<AudioRenderer::AudioRenderer> audio_renderer{};
-    std::unique_ptr<OpusDecoder::OpusDecoder> opus_decoder{};
+    std::optional<AudioRenderer::AudioRenderer> audio_renderer{};
+    std::optional<OpusDecoder::OpusDecoder> opus_decoder{};
 };
 
 } // namespace ADSP
diff --git a/src/audio_core/opus/decoder.cpp b/src/audio_core/opus/decoder.cpp
index e60a7d48d4..7d0cce74db 100644
--- a/src/audio_core/opus/decoder.cpp
+++ b/src/audio_core/opus/decoder.cpp
@@ -27,33 +27,31 @@ OpusDecoder::OpusDecoder(Core::System& system_, HardwareOpus& hardware_opus_)
 
 OpusDecoder::~OpusDecoder() {
     if (decode_object_initialized) {
-        hardware_opus.ShutdownDecodeObject(shared_buffer.get(), shared_buffer_size);
+        hardware_opus.ShutdownDecodeObject(shared_buffer.data(), shared_buffer.size());
     }
 }
 
-Result OpusDecoder::Initialize(const OpusParametersEx& params,
-                               Kernel::KTransferMemory* transfer_memory, u64 transfer_memory_size) {
+Result OpusDecoder::Initialize(const OpusParametersEx& params, Kernel::KTransferMemory* transfer_memory, u64 transfer_memory_size) {
     auto frame_size{params.use_large_frame_size ? 5760 : 1920};
-    shared_buffer_size = transfer_memory_size;
-    shared_buffer = std::make_unique<u8[]>(shared_buffer_size);
+    shared_buffer.resize(transfer_memory_size);
     shared_memory_mapped = true;
 
     buffer_size =
         Common::AlignUp((frame_size * params.channel_count) / (48'000 / params.sample_rate), 16);
 
-    out_data = {shared_buffer.get() + shared_buffer_size - buffer_size, buffer_size};
+    out_data = {shared_buffer.data() + shared_buffer.size() - buffer_size, buffer_size};
     size_t in_data_size{0x600u};
     in_data = {out_data.data() - in_data_size, in_data_size};
 
     ON_RESULT_FAILURE {
         if (shared_memory_mapped) {
             shared_memory_mapped = false;
-            ASSERT(R_SUCCEEDED(hardware_opus.UnmapMemory(shared_buffer.get(), shared_buffer_size)));
+            ASSERT(R_SUCCEEDED(hardware_opus.UnmapMemory(shared_buffer.data(), shared_buffer.size())));
         }
     };
 
     R_TRY(hardware_opus.InitializeDecodeObject(params.sample_rate, params.channel_count,
-                                               shared_buffer.get(), shared_buffer_size));
+                                               shared_buffer.data(), shared_buffer.size()));
 
     sample_rate = params.sample_rate;
     channel_count = params.channel_count;
@@ -62,31 +60,29 @@ Result OpusDecoder::Initialize(const OpusParametersEx& params,
     R_SUCCEED();
 }
 
-Result OpusDecoder::Initialize(const OpusMultiStreamParametersEx& params,
-                               Kernel::KTransferMemory* transfer_memory, u64 transfer_memory_size) {
+Result OpusDecoder::Initialize(const OpusMultiStreamParametersEx& params, Kernel::KTransferMemory* transfer_memory, u64 transfer_memory_size) {
     auto frame_size{params.use_large_frame_size ? 5760 : 1920};
-    shared_buffer_size = transfer_memory_size;
-    shared_buffer = std::make_unique<u8[]>(shared_buffer_size);
+    shared_buffer.resize(transfer_memory_size, 0);
     shared_memory_mapped = true;
 
     buffer_size =
         Common::AlignUp((frame_size * params.channel_count) / (48'000 / params.sample_rate), 16);
 
-    out_data = {shared_buffer.get() + shared_buffer_size - buffer_size, buffer_size};
+    out_data = {shared_buffer.data() + shared_buffer.size() - buffer_size, buffer_size};
     size_t in_data_size{Common::AlignUp(1500ull * params.total_stream_count, 64u)};
     in_data = {out_data.data() - in_data_size, in_data_size};
 
     ON_RESULT_FAILURE {
         if (shared_memory_mapped) {
             shared_memory_mapped = false;
-            ASSERT(R_SUCCEEDED(hardware_opus.UnmapMemory(shared_buffer.get(), shared_buffer_size)));
+            ASSERT(R_SUCCEEDED(hardware_opus.UnmapMemory(shared_buffer.data(), shared_buffer.size())));
         }
     };
 
     R_TRY(hardware_opus.InitializeMultiStreamDecodeObject(
         params.sample_rate, params.channel_count, params.total_stream_count,
-        params.stereo_stream_count, params.mappings.data(), shared_buffer.get(),
-        shared_buffer_size));
+        params.stereo_stream_count, params.mappings.data(), shared_buffer.data(),
+        shared_buffer.size()));
 
     sample_rate = params.sample_rate;
     channel_count = params.channel_count;
@@ -113,7 +109,7 @@ Result OpusDecoder::DecodeInterleaved(u32* out_data_size, u64* out_time_taken,
              ResultBufferTooSmall);
 
     if (!shared_memory_mapped) {
-        R_TRY(hardware_opus.MapMemory(shared_buffer.get(), shared_buffer_size));
+        R_TRY(hardware_opus.MapMemory(shared_buffer.data(), shared_buffer.size()));
         shared_memory_mapped = true;
     }
 
@@ -121,7 +117,7 @@ Result OpusDecoder::DecodeInterleaved(u32* out_data_size, u64* out_time_taken,
 
     R_TRY(hardware_opus.DecodeInterleaved(out_samples, out_data.data(), out_data.size_bytes(),
                                           channel_count, in_data.data(), header.size,
-                                          shared_buffer.get(), time_taken, reset));
+                                          shared_buffer.data(), time_taken, reset));
 
     std::memcpy(output_data.data(), out_data.data(), out_samples * channel_count * sizeof(s16));
 
@@ -136,7 +132,7 @@ Result OpusDecoder::DecodeInterleaved(u32* out_data_size, u64* out_time_taken,
 Result OpusDecoder::SetContext([[maybe_unused]] std::span<const u8> context) {
     R_SUCCEED_IF(shared_memory_mapped);
     shared_memory_mapped = true;
-    R_RETURN(hardware_opus.MapMemory(shared_buffer.get(), shared_buffer_size));
+    R_RETURN(hardware_opus.MapMemory(shared_buffer.data(), shared_buffer.size()));
 }
 
 Result OpusDecoder::DecodeInterleavedForMultiStream(u32* out_data_size, u64* out_time_taken,
@@ -159,7 +155,7 @@ Result OpusDecoder::DecodeInterleavedForMultiStream(u32* out_data_size, u64* out
              ResultBufferTooSmall);
 
     if (!shared_memory_mapped) {
-        R_TRY(hardware_opus.MapMemory(shared_buffer.get(), shared_buffer_size));
+        R_TRY(hardware_opus.MapMemory(shared_buffer.data(), shared_buffer.size()));
         shared_memory_mapped = true;
     }
 
@@ -167,7 +163,7 @@ Result OpusDecoder::DecodeInterleavedForMultiStream(u32* out_data_size, u64* out
 
     R_TRY(hardware_opus.DecodeInterleavedForMultiStream(
         out_samples, out_data.data(), out_data.size_bytes(), channel_count, in_data.data(),
-        header.size, shared_buffer.get(), time_taken, reset));
+        header.size, shared_buffer.data(), time_taken, reset));
 
     std::memcpy(output_data.data(), out_data.data(), out_samples * channel_count * sizeof(s16));
 
diff --git a/src/audio_core/opus/decoder.h b/src/audio_core/opus/decoder.h
index 1b8c257d43..33bf88e349 100644
--- a/src/audio_core/opus/decoder.h
+++ b/src/audio_core/opus/decoder.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
@@ -36,8 +39,7 @@ public:
 private:
     Core::System& system;
     HardwareOpus& hardware_opus;
-    std::unique_ptr<u8[]> shared_buffer{};
-    u64 shared_buffer_size;
+    std::vector<u8> shared_buffer{};
     std::span<u8> in_data{};
     std::span<u8> out_data{};
     u64 buffer_size{};
diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp
index 4f0f2b6430..ea3da3d053 100644
--- a/src/common/fiber.cpp
+++ b/src/common/fiber.cpp
@@ -4,6 +4,7 @@
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include <thread>
 #include <mutex>
 
 #include "common/assert.h"
@@ -14,100 +15,70 @@
 
 namespace Common {
 
-constexpr std::size_t default_stack_size = 512 * 1024;
+constexpr size_t DEFAULT_STACK_SIZE = 128 * 4096;
+constexpr u32 CANARY_VALUE = 0xDEADBEEF;
 
 struct Fiber::FiberImpl {
-    FiberImpl() : stack{default_stack_size}, rewind_stack{default_stack_size} {}
+    FiberImpl() {}
 
-    VirtualBuffer<u8> stack;
-    VirtualBuffer<u8> rewind_stack;
+    std::array<u8, DEFAULT_STACK_SIZE> stack{};
+    std::array<u8, DEFAULT_STACK_SIZE> rewind_stack{};
+    u32 canary = CANARY_VALUE;
+
+    boost::context::detail::fcontext_t context{};
+    boost::context::detail::fcontext_t rewind_context{};
 
     std::mutex guard;
     std::function<void()> entry_point;
     std::function<void()> rewind_point;
     std::shared_ptr<Fiber> previous_fiber;
-    bool is_thread_fiber{};
-    bool released{};
 
-    u8* stack_limit{};
-    u8* rewind_stack_limit{};
-    boost::context::detail::fcontext_t context{};
-    boost::context::detail::fcontext_t rewind_context{};
+    u8* stack_limit = nullptr;
+    u8* rewind_stack_limit = nullptr;
+    bool is_thread_fiber = false;
+    bool released = false;
 };
 
 void Fiber::SetRewindPoint(std::function<void()>&& rewind_func) {
     impl->rewind_point = std::move(rewind_func);
 }
 
-void Fiber::Start(boost::context::detail::transfer_t& transfer) {
-    ASSERT(impl->previous_fiber != nullptr);
-    impl->previous_fiber->impl->context = transfer.fctx;
-    impl->previous_fiber->impl->guard.unlock();
-    impl->previous_fiber.reset();
-    impl->entry_point();
-    UNREACHABLE();
-}
-
-void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) {
-    ASSERT(impl->context != nullptr);
-    impl->context = impl->rewind_context;
-    impl->rewind_context = nullptr;
-    u8* tmp = impl->stack_limit;
-    impl->stack_limit = impl->rewind_stack_limit;
-    impl->rewind_stack_limit = tmp;
-    impl->rewind_point();
-    UNREACHABLE();
-}
-
-void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) {
-    auto* fiber = static_cast<Fiber*>(transfer.data);
-    fiber->Start(transfer);
-}
-
-void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) {
-    auto* fiber = static_cast<Fiber*>(transfer.data);
-    fiber->OnRewind(transfer);
-}
-
 Fiber::Fiber(std::function<void()>&& entry_point_func) : impl{std::make_unique<FiberImpl>()} {
     impl->entry_point = std::move(entry_point_func);
     impl->stack_limit = impl->stack.data();
     impl->rewind_stack_limit = impl->rewind_stack.data();
-    u8* stack_base = impl->stack_limit + default_stack_size;
-    impl->context =
-        boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc);
+    u8* stack_base = impl->stack_limit + DEFAULT_STACK_SIZE;
+    impl->context = boost::context::detail::make_fcontext(stack_base, impl->stack.size(), [](boost::context::detail::transfer_t transfer) -> void {
+        auto* fiber = static_cast<Fiber*>(transfer.data);
+        ASSERT(fiber && fiber->impl && fiber->impl->previous_fiber && fiber->impl->previous_fiber->impl);
+        ASSERT(fiber->impl->canary == CANARY_VALUE);
+        fiber->impl->previous_fiber->impl->context = transfer.fctx;
+        fiber->impl->previous_fiber->impl->guard.unlock();
+        fiber->impl->previous_fiber.reset();
+        fiber->impl->entry_point();
+        UNREACHABLE();
+    });
 }
 
 Fiber::Fiber() : impl{std::make_unique<FiberImpl>()} {}
 
 Fiber::~Fiber() {
-    if (impl->released) {
-        return;
-    }
-    // Make sure the Fiber is not being used
-    const bool locked = impl->guard.try_lock();
-    ASSERT_MSG(locked, "Destroying a fiber that's still running");
-    if (locked) {
-        impl->guard.unlock();
+    if (!impl->released) {
+        // Make sure the Fiber is not being used
+        const bool locked = impl->guard.try_lock();
+        ASSERT_MSG(locked, "Destroying a fiber that's still running");
+        if (locked) {
+            impl->guard.unlock();
+        }
     }
 }
 
 void Fiber::Exit() {
     ASSERT_MSG(impl->is_thread_fiber, "Exiting non main thread fiber");
-    if (!impl->is_thread_fiber) {
-        return;
+    if (impl->is_thread_fiber) {
+        impl->guard.unlock();
+        impl->released = true;
     }
-    impl->guard.unlock();
-    impl->released = true;
-}
-
-void Fiber::Rewind() {
-    ASSERT(impl->rewind_point);
-    ASSERT(impl->rewind_context == nullptr);
-    u8* stack_base = impl->rewind_stack_limit + default_stack_size;
-    impl->rewind_context =
-        boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc);
-    boost::context::detail::jump_fcontext(impl->rewind_context, this);
 }
 
 void Fiber::YieldTo(std::weak_ptr<Fiber> weak_from, Fiber& to) {
@@ -115,16 +86,15 @@ void Fiber::YieldTo(std::weak_ptr<Fiber> weak_from, Fiber& to) {
     to.impl->previous_fiber = weak_from.lock();
 
     auto transfer = boost::context::detail::jump_fcontext(to.impl->context, &to);
-
     // "from" might no longer be valid if the thread was killed
     if (auto from = weak_from.lock()) {
         if (from->impl->previous_fiber == nullptr) {
-            ASSERT_MSG(false, "previous_fiber is nullptr!");
-            return;
+            ASSERT(false && "previous_fiber is nullptr!");
+        } else {
+            from->impl->previous_fiber->impl->context = transfer.fctx;
+            from->impl->previous_fiber->impl->guard.unlock();
+            from->impl->previous_fiber.reset();
         }
-        from->impl->previous_fiber->impl->context = transfer.fctx;
-        from->impl->previous_fiber->impl->guard.unlock();
-        from->impl->previous_fiber.reset();
     }
 }
 
diff --git a/src/common/fiber.h b/src/common/fiber.h
index 8af6ae4d3a..eb128f4bb2 100644
--- a/src/common/fiber.h
+++ b/src/common/fiber.h
@@ -45,22 +45,12 @@ public:
     /// Fiber 'from' must be the currently running fiber.
     static void YieldTo(std::weak_ptr<Fiber> weak_from, Fiber& to);
     [[nodiscard]] static std::shared_ptr<Fiber> ThreadToFiber();
-
     void SetRewindPoint(std::function<void()>&& rewind_func);
-
-    void Rewind();
-
     /// Only call from main thread's fiber
     void Exit();
-
 private:
     Fiber();
-
-    void OnRewind(boost::context::detail::transfer_t& transfer);
     void Start(boost::context::detail::transfer_t& transfer);
-    static void FiberStartFunc(boost::context::detail::transfer_t transfer);
-    static void RewindStartFunc(boost::context::detail::transfer_t transfer);
-
     struct FiberImpl;
     std::unique_ptr<FiberImpl> impl;
 };
diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp
index e14bf3e651..4f9c240905 100644
--- a/src/common/wall_clock.cpp
+++ b/src/common/wall_clock.cpp
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
@@ -9,7 +12,6 @@
 #include "common/x64/native_clock.h"
 #include "common/x64/rdtsc.h"
 #endif
-
 #ifdef HAS_NCE
 #include "common/arm64/native_clock.h"
 #endif
@@ -73,8 +75,4 @@ std::unique_ptr<WallClock> CreateOptimalClock() {
 #endif
 }
 
-std::unique_ptr<WallClock> CreateStandardWallClock() {
-    return std::make_unique<StandardWallClock>();
-}
-
 } // namespace Common
diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h
index 3a0c43909a..7ad6536930 100644
--- a/src/common/wall_clock.h
+++ b/src/common/wall_clock.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
@@ -84,8 +87,6 @@ protected:
     using CPUTickToGPUTickRatio = std::ratio<GPUTickFreq, CPUTickFreq>;
 };
 
-std::unique_ptr<WallClock> CreateOptimalClock();
-
-std::unique_ptr<WallClock> CreateStandardWallClock();
+[[nodiscard]] std::unique_ptr<WallClock> CreateOptimalClock();
 
 } // namespace Common
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
index b57996cb8b..0fa4ca6f06 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp
@@ -16,170 +16,160 @@ namespace Core {
 
 using namespace Common::Literals;
 
-class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
-public:
-    explicit DynarmicCallbacks32(ArmDynarmic32& parent, Kernel::KProcess* process)
-        : m_parent{parent}, m_memory(process->GetMemory()),
-          m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()},
-          m_check_memory_access{m_debugger_enabled ||
-                                !Settings::values.cpuopt_ignore_memory_aborts.GetValue()} {}
+DynarmicCallbacks32::DynarmicCallbacks32(ArmDynarmic32& parent, Kernel::KProcess* process)
+    : m_parent{parent}, m_memory(process->GetMemory())
+    , m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()}
+    , m_check_memory_access{m_debugger_enabled || !Settings::values.cpuopt_ignore_memory_aborts.GetValue()}
+{}
 
-    u8 MemoryRead8(u32 vaddr) override {
-        CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read);
-        return m_memory.Read8(vaddr);
-    }
-    u16 MemoryRead16(u32 vaddr) override {
-        CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read);
-        return m_memory.Read16(vaddr);
-    }
-    u32 MemoryRead32(u32 vaddr) override {
-        CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read);
-        return m_memory.Read32(vaddr);
-    }
-    u64 MemoryRead64(u32 vaddr) override {
-        CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read);
-        return m_memory.Read64(vaddr);
-    }
-    std::optional<u32> MemoryReadCode(u32 vaddr) override {
-        if (!m_memory.IsValidVirtualAddressRange(vaddr, sizeof(u32))) {
-            return std::nullopt;
-        }
-        return m_memory.Read32(vaddr);
-    }
+u8 DynarmicCallbacks32::MemoryRead8(u32 vaddr) {
+    CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read);
+    return m_memory.Read8(vaddr);
+}
+u16 DynarmicCallbacks32::MemoryRead16(u32 vaddr) {
+    CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read);
+    return m_memory.Read16(vaddr);
+}
+u32 DynarmicCallbacks32::MemoryRead32(u32 vaddr) {
+    CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read);
+    return m_memory.Read32(vaddr);
+}
+u64 DynarmicCallbacks32::MemoryRead64(u32 vaddr) {
+    CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read);
+    return m_memory.Read64(vaddr);
+}
+std::optional<u32> DynarmicCallbacks32::MemoryReadCode(u32 vaddr) {
+    if (!m_memory.IsValidVirtualAddressRange(vaddr, sizeof(u32)))
+        return std::nullopt;
+    return m_memory.Read32(vaddr);
+}
 
-    void MemoryWrite8(u32 vaddr, u8 value) override {
-        if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) {
-            m_memory.Write8(vaddr, value);
-        }
+void DynarmicCallbacks32::MemoryWrite8(u32 vaddr, u8 value) {
+    if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) {
+        m_memory.Write8(vaddr, value);
     }
-    void MemoryWrite16(u32 vaddr, u16 value) override {
-        if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) {
-            m_memory.Write16(vaddr, value);
-        }
+}
+void DynarmicCallbacks32::MemoryWrite16(u32 vaddr, u16 value) {
+    if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) {
+        m_memory.Write16(vaddr, value);
     }
-    void MemoryWrite32(u32 vaddr, u32 value) override {
-        if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) {
-            m_memory.Write32(vaddr, value);
-        }
+}
+void DynarmicCallbacks32::MemoryWrite32(u32 vaddr, u32 value) {
+    if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) {
+        m_memory.Write32(vaddr, value);
     }
-    void MemoryWrite64(u32 vaddr, u64 value) override {
-        if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) {
-            m_memory.Write64(vaddr, value);
-        }
+}
+void DynarmicCallbacks32::MemoryWrite64(u32 vaddr, u64 value) {
+    if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) {
+        m_memory.Write64(vaddr, value);
     }
+}
 
-    bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override {
-        return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) &&
-               m_memory.WriteExclusive8(vaddr, value, expected);
-    }
-    bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override {
-        return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) &&
-               m_memory.WriteExclusive16(vaddr, value, expected);
-    }
-    bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override {
-        return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) &&
-               m_memory.WriteExclusive32(vaddr, value, expected);
-    }
-    bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override {
-        return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) &&
-               m_memory.WriteExclusive64(vaddr, value, expected);
-    }
+bool DynarmicCallbacks32::MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) {
+    return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) &&
+            m_memory.WriteExclusive8(vaddr, value, expected);
+}
+bool DynarmicCallbacks32::MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) {
+    return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) &&
+            m_memory.WriteExclusive16(vaddr, value, expected);
+}
+bool DynarmicCallbacks32::MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) {
+    return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) &&
+            m_memory.WriteExclusive32(vaddr, value, expected);
+}
+bool DynarmicCallbacks32::MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) {
+    return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) &&
+            m_memory.WriteExclusive64(vaddr, value, expected);
+}
 
-    void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
-        m_parent.LogBacktrace(m_process);
-        LOG_ERROR(Core_ARM,
-                  "Unimplemented instruction @ {:#X} for {} instructions (instr = {:08X})", pc,
-                  num_instructions, m_memory.Read32(pc));
-    }
+void DynarmicCallbacks32::InterpreterFallback(u32 pc, std::size_t num_instructions) {
+    m_parent.LogBacktrace(m_process);
+    LOG_ERROR(Core_ARM,
+                "Unimplemented instruction @ {:#X} for {} instructions (instr = {:08X})", pc,
+                num_instructions, m_memory.Read32(pc));
+}
 
-    void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
-        switch (exception) {
-        case Dynarmic::A32::Exception::NoExecuteFault:
-            LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#08x}", pc);
-            ReturnException(pc, PrefetchAbort);
+void DynarmicCallbacks32::ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) {
+    switch (exception) {
+    case Dynarmic::A32::Exception::NoExecuteFault:
+        LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#08x}", pc);
+        ReturnException(pc, PrefetchAbort);
+        return;
+    default:
+        if (m_debugger_enabled) {
+            ReturnException(pc, InstructionBreakpoint);
             return;
-        default:
-            if (m_debugger_enabled) {
-                ReturnException(pc, InstructionBreakpoint);
-                return;
-            }
-
-            m_parent.LogBacktrace(m_process);
-            LOG_CRITICAL(Core_ARM,
-                         "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X}, thumb = {})",
-                         exception, pc, m_memory.Read32(pc), m_parent.IsInThumbMode());
         }
+
+        m_parent.LogBacktrace(m_process);
+        LOG_CRITICAL(Core_ARM,
+                        "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X}, thumb = {})",
+                        exception, pc, m_memory.Read32(pc), m_parent.IsInThumbMode());
     }
+}
 
-    void CallSVC(u32 swi) override {
-        m_parent.m_svc_swi = swi;
-        m_parent.m_jit->HaltExecution(SupervisorCall);
-    }
+void DynarmicCallbacks32::CallSVC(u32 swi) {
+    m_parent.m_svc_swi = swi;
+    m_parent.m_jit->HaltExecution(SupervisorCall);
+}
 
-    void AddTicks(u64 ticks) override {
-        ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");
+void DynarmicCallbacks32::AddTicks(u64 ticks) {
+    ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");
 
-        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
-        // rough approximation of the amount of executed ticks in the system, it may be thrown off
-        // if not all cores are doing a similar amount of work. Instead of doing this, we should
-        // device a way so that timing is consistent across all cores without increasing the ticks 4
-        // times.
-        u64 amortized_ticks = ticks / Core::Hardware::NUM_CPU_CORES;
-        // Always execute at least one tick.
-        amortized_ticks = std::max<u64>(amortized_ticks, 1);
+    // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
+    // rough approximation of the amount of executed ticks in the system, it may be thrown off
+    // if not all cores are doing a similar amount of work. Instead of doing this, we should
+    // device a way so that timing is consistent across all cores without increasing the ticks 4
+    // times.
+    u64 amortized_ticks = ticks / Core::Hardware::NUM_CPU_CORES;
+    // Always execute at least one tick.
+    amortized_ticks = std::max<u64>(amortized_ticks, 1);
 
-        m_parent.m_system.CoreTiming().AddTicks(amortized_ticks);
-    }
+    m_parent.m_system.CoreTiming().AddTicks(amortized_ticks);
+}
 
-    u64 GetTicksRemaining() override {
-        ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");
+u64 DynarmicCallbacks32::GetTicksRemaining() {
+    ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");
 
-        return std::max<s64>(m_parent.m_system.CoreTiming().GetDowncount(), 0);
-    }
-
-    bool CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type) {
-        if (!m_check_memory_access) {
-            return true;
-        }
-
-        if (!m_memory.IsValidVirtualAddressRange(addr, size)) {
-            LOG_CRITICAL(Core_ARM, "Stopping execution due to unmapped memory access at {:#x}",
-                         addr);
-            m_parent.m_jit->HaltExecution(PrefetchAbort);
-            return false;
-        }
-
-        if (!m_debugger_enabled) {
-            return true;
-        }
-
-        const auto match{m_parent.MatchingWatchpoint(addr, size, type)};
-        if (match) {
-            m_parent.m_halted_watchpoint = match;
-            m_parent.m_jit->HaltExecution(DataAbort);
-            return false;
-        }
+    return std::max<s64>(m_parent.m_system.CoreTiming().GetDowncount(), 0);
+}
 
+bool DynarmicCallbacks32::CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type) {
+    if (!m_check_memory_access) {
         return true;
     }
 
-    void ReturnException(u32 pc, Dynarmic::HaltReason hr) {
-        m_parent.GetContext(m_parent.m_breakpoint_context);
-        m_parent.m_breakpoint_context.pc = pc;
-        m_parent.m_breakpoint_context.r[15] = pc;
-        m_parent.m_jit->HaltExecution(hr);
+    if (!m_memory.IsValidVirtualAddressRange(addr, size)) {
+        LOG_CRITICAL(Core_ARM, "Stopping execution due to unmapped memory access at {:#x}",
+                        addr);
+        m_parent.m_jit->HaltExecution(PrefetchAbort);
+        return false;
     }
 
-    ArmDynarmic32& m_parent;
-    Core::Memory::Memory& m_memory;
-    Kernel::KProcess* m_process{};
-    const bool m_debugger_enabled{};
-    const bool m_check_memory_access{};
-};
+    if (!m_debugger_enabled) {
+        return true;
+    }
 
-std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* page_table) const {
+    const auto match{m_parent.MatchingWatchpoint(addr, size, type)};
+    if (match) {
+        m_parent.m_halted_watchpoint = match;
+        m_parent.m_jit->HaltExecution(DataAbort);
+        return false;
+    }
+
+    return true;
+}
+
+void DynarmicCallbacks32::ReturnException(u32 pc, Dynarmic::HaltReason hr) {
+    m_parent.GetContext(m_parent.m_breakpoint_context);
+    m_parent.m_breakpoint_context.pc = pc;
+    m_parent.m_breakpoint_context.r[15] = pc;
+    m_parent.m_jit->HaltExecution(hr);
+}
+
+void ArmDynarmic32::MakeJit(Common::PageTable* page_table) {
     Dynarmic::A32::UserConfig config;
-    config.callbacks = m_cb.get();
+    config.callbacks = std::addressof(*m_cb);
     config.coprocessors[15] = m_cp15;
     config.define_unpredictable_behaviour = true;
 
@@ -315,7 +305,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ArmDynarmic32::MakeJit(Common::PageTable* pa
     default:
         break;
     }
-    return std::make_unique<Dynarmic::A32::Jit>(config);
+    m_jit.emplace(config);
 }
 
 static std::pair<u32, u32> FpscrToFpsrFpcr(u32 fpscr) {
@@ -360,21 +350,17 @@ u32 ArmDynarmic32::GetSvcNumber() const {
 }
 
 void ArmDynarmic32::GetSvcArguments(std::span<uint64_t, 8> args) const {
-    Dynarmic::A32::Jit& j = *m_jit;
+    Dynarmic::A32::Jit const& j = *m_jit;
     auto& gpr = j.Regs();
-
-    for (size_t i = 0; i < 8; i++) {
+    for (size_t i = 0; i < 8; i++)
         args[i] = gpr[i];
-    }
 }
 
 void ArmDynarmic32::SetSvcArguments(std::span<const uint64_t, 8> args) {
     Dynarmic::A32::Jit& j = *m_jit;
     auto& gpr = j.Regs();
-
-    for (size_t i = 0; i < 8; i++) {
-        gpr[i] = static_cast<u32>(args[i]);
-    }
+    for (size_t i = 0; i < 8; i++)
+        gpr[i] = u32(args[i]);
 }
 
 const Kernel::DebugWatchpoint* ArmDynarmic32::HaltedWatchpoint() const {
@@ -387,11 +373,12 @@ void ArmDynarmic32::RewindBreakpointInstruction() {
 
 ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process,
                              DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor},
-      m_cb(std::make_unique<DynarmicCallbacks32>(*this, process)),
-      m_cp15(std::make_shared<DynarmicCP15>(*this)), m_core_index{core_index} {
+    : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor}
+    , m_cb(std::make_optional<DynarmicCallbacks32>(*this, process))
+    , m_cp15(std::make_shared<DynarmicCP15>(*this)), m_core_index{core_index}
+{
     auto& page_table_impl = process->GetPageTable().GetBasePageTable().GetImpl();
-    m_jit = MakeJit(&page_table_impl);
+    MakeJit(&page_table_impl);
 }
 
 ArmDynarmic32::~ArmDynarmic32() = default;
@@ -401,23 +388,18 @@ void ArmDynarmic32::SetTpidrroEl0(u64 value) {
 }
 
 void ArmDynarmic32::GetContext(Kernel::Svc::ThreadContext& ctx) const {
-    Dynarmic::A32::Jit& j = *m_jit;
+    Dynarmic::A32::Jit const& j = *m_jit;
     auto& gpr = j.Regs();
     auto& fpr = j.ExtRegs();
-
-    for (size_t i = 0; i < 16; i++) {
+    for (size_t i = 0; i < 16; i++)
         ctx.r[i] = gpr[i];
-    }
-
     ctx.fp = gpr[11];
     ctx.sp = gpr[13];
     ctx.lr = gpr[14];
     ctx.pc = gpr[15];
     ctx.pstate = j.Cpsr();
-
     static_assert(sizeof(fpr) <= sizeof(ctx.v));
     std::memcpy(ctx.v.data(), &fpr, sizeof(fpr));
-
     auto [fpsr, fpcr] = FpscrToFpsrFpcr(j.Fpscr());
     ctx.fpcr = fpcr;
     ctx.fpsr = fpsr;
@@ -428,16 +410,11 @@ void ArmDynarmic32::SetContext(const Kernel::Svc::ThreadContext& ctx) {
     Dynarmic::A32::Jit& j = *m_jit;
     auto& gpr = j.Regs();
     auto& fpr = j.ExtRegs();
-
-    for (size_t i = 0; i < 16; i++) {
-        gpr[i] = static_cast<u32>(ctx.r[i]);
-    }
-
+    for (size_t i = 0; i < 16; i++)
+        gpr[i] = u32(ctx.r[i]);
     j.SetCpsr(ctx.pstate);
-
     static_assert(sizeof(fpr) <= sizeof(ctx.v));
     std::memcpy(&fpr, ctx.v.data(), sizeof(fpr));
-
     j.SetFpscr(FpsrFpcrToFpscr(ctx.fpsr, ctx.fpcr));
     m_cp15->uprw = static_cast<u32>(ctx.tpidr);
 }
diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h
index b580efe615..1934934bd9 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_32.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_32.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
@@ -12,16 +15,50 @@ namespace Core::Memory {
 class Memory;
 }
 
+namespace Kernel {
+enum class DebugWatchpointType : u8;
+class KPRocess;
+}
+
 namespace Core {
 
-class DynarmicCallbacks32;
+class ArmDynarmic32;
 class DynarmicCP15;
 class System;
 
+class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
+public:
+    explicit DynarmicCallbacks32(ArmDynarmic32& parent, Kernel::KProcess* process);
+    u8 MemoryRead8(u32 vaddr) override;
+    u16 MemoryRead16(u32 vaddr) override;
+    u32 MemoryRead32(u32 vaddr) override;
+    u64 MemoryRead64(u32 vaddr) override;
+    std::optional<u32> MemoryReadCode(u32 vaddr) override;
+    void MemoryWrite8(u32 vaddr, u8 value) override;
+    void MemoryWrite16(u32 vaddr, u16 value) override;
+    void MemoryWrite32(u32 vaddr, u32 value) override;
+    void MemoryWrite64(u32 vaddr, u64 value) override;
+    bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override;
+    bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override;
+    bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override;
+    bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override;
+    void InterpreterFallback(u32 pc, std::size_t num_instructions) override;
+    void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override;
+    void CallSVC(u32 swi) override;
+    void AddTicks(u64 ticks) override;
+    u64 GetTicksRemaining() override;
+    bool CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type);
+    void ReturnException(u32 pc, Dynarmic::HaltReason hr);
+    ArmDynarmic32& m_parent;
+    Core::Memory::Memory& m_memory;
+    Kernel::KProcess* m_process{};
+    const bool m_debugger_enabled{};
+    const bool m_check_memory_access{};
+};
+
 class ArmDynarmic32 final : public ArmInterface {
 public:
-    ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process,
-                  DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index);
+    ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process, DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index);
     ~ArmDynarmic32() override;
 
     Architecture GetArchitecture() const override {
@@ -57,13 +94,13 @@ private:
     friend class DynarmicCallbacks32;
     friend class DynarmicCP15;
 
-    std::shared_ptr<Dynarmic::A32::Jit> MakeJit(Common::PageTable* page_table) const;
+    void MakeJit(Common::PageTable* page_table);
 
-    std::unique_ptr<DynarmicCallbacks32> m_cb{};
+    std::optional<DynarmicCallbacks32> m_cb{};
     std::shared_ptr<DynarmicCP15> m_cp15{};
     std::size_t m_core_index{};
 
-    std::shared_ptr<Dynarmic::A32::Jit> m_jit{};
+    std::optional<Dynarmic::A32::Jit> m_jit{};
 
     // SVC callback
     u32 m_svc_swi{};
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
index ba6178c1e4..92e1a70458 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp
@@ -13,223 +13,203 @@
 
 namespace Core {
 
-using Vector = Dynarmic::A64::Vector;
 using namespace Common::Literals;
 
-class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
-public:
-    explicit DynarmicCallbacks64(ArmDynarmic64& parent, Kernel::KProcess* process)
-        : m_parent{parent}, m_memory(process->GetMemory()),
-          m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()},
-          m_check_memory_access{m_debugger_enabled ||
-                                !Settings::values.cpuopt_ignore_memory_aborts.GetValue()} {}
+DynarmicCallbacks64::DynarmicCallbacks64(ArmDynarmic64& parent, Kernel::KProcess* process)
+    : m_parent{parent}, m_memory(process->GetMemory())
+    , m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()}
+    , m_check_memory_access{m_debugger_enabled || !Settings::values.cpuopt_ignore_memory_aborts.GetValue()}
+{}
 
-    u8 MemoryRead8(u64 vaddr) override {
-        CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read);
-        return m_memory.Read8(vaddr);
-    }
-    u16 MemoryRead16(u64 vaddr) override {
-        CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read);
-        return m_memory.Read16(vaddr);
-    }
-    u32 MemoryRead32(u64 vaddr) override {
-        CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read);
-        return m_memory.Read32(vaddr);
-    }
-    u64 MemoryRead64(u64 vaddr) override {
-        CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read);
-        return m_memory.Read64(vaddr);
-    }
-    Vector MemoryRead128(u64 vaddr) override {
-        CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Read);
-        return {m_memory.Read64(vaddr), m_memory.Read64(vaddr + 8)};
-    }
-    std::optional<u32> MemoryReadCode(u64 vaddr) override {
-        if (!m_memory.IsValidVirtualAddressRange(vaddr, sizeof(u32))) {
-            return std::nullopt;
-        }
-        return m_memory.Read32(vaddr);
-    }
+u8 DynarmicCallbacks64::MemoryRead8(u64 vaddr) {
+    CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read);
+    return m_memory.Read8(vaddr);
+}
+u16 DynarmicCallbacks64::MemoryRead16(u64 vaddr) {
+    CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read);
+    return m_memory.Read16(vaddr);
+}
+u32 DynarmicCallbacks64::MemoryRead32(u64 vaddr) {
+    CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read);
+    return m_memory.Read32(vaddr);
+}
+u64 DynarmicCallbacks64::MemoryRead64(u64 vaddr) {
+    CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read);
+    return m_memory.Read64(vaddr);
+}
+Dynarmic::A64::Vector DynarmicCallbacks64::MemoryRead128(u64 vaddr) {
+    CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Read);
+    return {m_memory.Read64(vaddr), m_memory.Read64(vaddr + 8)};
+}
+std::optional<u32> DynarmicCallbacks64::MemoryReadCode(u64 vaddr) {
+    if (!m_memory.IsValidVirtualAddressRange(vaddr, sizeof(u32)))
+        return std::nullopt;
+    return m_memory.Read32(vaddr);
+}
 
-    void MemoryWrite8(u64 vaddr, u8 value) override {
-        if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) {
-            m_memory.Write8(vaddr, value);
-        }
+void DynarmicCallbacks64::MemoryWrite8(u64 vaddr, u8 value) {
+    if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) {
+        m_memory.Write8(vaddr, value);
     }
-    void MemoryWrite16(u64 vaddr, u16 value) override {
-        if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) {
-            m_memory.Write16(vaddr, value);
-        }
+}
+void DynarmicCallbacks64::MemoryWrite16(u64 vaddr, u16 value) {
+    if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) {
+        m_memory.Write16(vaddr, value);
     }
-    void MemoryWrite32(u64 vaddr, u32 value) override {
-        if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) {
-            m_memory.Write32(vaddr, value);
-        }
+}
+void DynarmicCallbacks64::MemoryWrite32(u64 vaddr, u32 value) {
+    if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) {
+        m_memory.Write32(vaddr, value);
     }
-    void MemoryWrite64(u64 vaddr, u64 value) override {
-        if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) {
-            m_memory.Write64(vaddr, value);
-        }
+}
+void DynarmicCallbacks64::MemoryWrite64(u64 vaddr, u64 value) {
+    if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) {
+        m_memory.Write64(vaddr, value);
     }
-    void MemoryWrite128(u64 vaddr, Vector value) override {
-        if (CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write)) {
-            m_memory.Write64(vaddr, value[0]);
-            m_memory.Write64(vaddr + 8, value[1]);
-        }
+}
+void DynarmicCallbacks64::MemoryWrite128(u64 vaddr, Dynarmic::A64::Vector value) {
+    if (CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write)) {
+        m_memory.Write64(vaddr, value[0]);
+        m_memory.Write64(vaddr + 8, value[1]);
     }
+}
 
-    bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override {
-        return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) &&
-               m_memory.WriteExclusive8(vaddr, value, expected);
-    }
-    bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override {
-        return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) &&
-               m_memory.WriteExclusive16(vaddr, value, expected);
-    }
-    bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override {
-        return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) &&
-               m_memory.WriteExclusive32(vaddr, value, expected);
-    }
-    bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override {
-        return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) &&
-               m_memory.WriteExclusive64(vaddr, value, expected);
-    }
-    bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override {
-        return CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write) &&
-               m_memory.WriteExclusive128(vaddr, value, expected);
-    }
+bool DynarmicCallbacks64::MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) {
+    return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) &&
+            m_memory.WriteExclusive8(vaddr, value, expected);
+}
+bool DynarmicCallbacks64::MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) {
+    return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) &&
+            m_memory.WriteExclusive16(vaddr, value, expected);
+}
+bool DynarmicCallbacks64::MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) {
+    return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) &&
+            m_memory.WriteExclusive32(vaddr, value, expected);
+}
+bool DynarmicCallbacks64::MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) {
+    return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) &&
+            m_memory.WriteExclusive64(vaddr, value, expected);
+}
+bool DynarmicCallbacks64::MemoryWriteExclusive128(u64 vaddr, Dynarmic::A64::Vector value, Dynarmic::A64::Vector expected) {
+    return CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write) &&
+            m_memory.WriteExclusive128(vaddr, value, expected);
+}
 
-    void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
-        m_parent.LogBacktrace(m_process);
-        LOG_ERROR(Core_ARM,
-                  "Unimplemented instruction @ {:#X} for {} instructions (instr = {:08X})", pc,
-                  num_instructions, m_memory.Read32(pc));
+void DynarmicCallbacks64::InterpreterFallback(u64 pc, std::size_t num_instructions) {
+    m_parent.LogBacktrace(m_process);
+    LOG_ERROR(Core_ARM, "Unimplemented instruction @ {:#X} for {} instructions (instr = {:08X})", pc,
+        num_instructions, m_memory.Read32(pc));
+    ReturnException(pc, PrefetchAbort);
+}
+
+void DynarmicCallbacks64::InstructionCacheOperationRaised(Dynarmic::A64::InstructionCacheOperation op, u64 value) {
+    switch (op) {
+    case Dynarmic::A64::InstructionCacheOperation::InvalidateByVAToPoU: {
+        static constexpr u64 ICACHE_LINE_SIZE = 64;
+        const u64 cache_line_start = value & ~(ICACHE_LINE_SIZE - 1);
+        m_parent.InvalidateCacheRange(cache_line_start, ICACHE_LINE_SIZE);
+        break;
+    }
+    case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoU:
+        m_parent.ClearInstructionCache();
+        break;
+    case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoUInnerSharable:
+    default:
+        LOG_DEBUG(Core_ARM, "Unprocesseed instruction cache operation: {}", op);
+        break;
+    }
+    m_parent.m_jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation);
+}
+
+void DynarmicCallbacks64::ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) {
+    switch (exception) {
+    case Dynarmic::A64::Exception::WaitForInterrupt:
+    case Dynarmic::A64::Exception::WaitForEvent:
+    case Dynarmic::A64::Exception::SendEvent:
+    case Dynarmic::A64::Exception::SendEventLocal:
+    case Dynarmic::A64::Exception::Yield:
+        LOG_TRACE(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast<std::size_t>(exception), pc, m_memory.Read32(pc));
+        return;
+    case Dynarmic::A64::Exception::NoExecuteFault:
+        LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#016x}", pc);
         ReturnException(pc, PrefetchAbort);
-    }
-
-    void InstructionCacheOperationRaised(Dynarmic::A64::InstructionCacheOperation op,
-                                         u64 value) override {
-        switch (op) {
-        case Dynarmic::A64::InstructionCacheOperation::InvalidateByVAToPoU: {
-            static constexpr u64 ICACHE_LINE_SIZE = 64;
-
-            const u64 cache_line_start = value & ~(ICACHE_LINE_SIZE - 1);
-            m_parent.InvalidateCacheRange(cache_line_start, ICACHE_LINE_SIZE);
-            break;
-        }
-        case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoU:
-            m_parent.ClearInstructionCache();
-            break;
-        case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoUInnerSharable:
-        default:
-            LOG_DEBUG(Core_ARM, "Unprocesseed instruction cache operation: {}", op);
-            break;
-        }
-
-        m_parent.m_jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation);
-    }
-
-    void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override {
-        switch (exception) {
-        case Dynarmic::A64::Exception::WaitForInterrupt:
-        case Dynarmic::A64::Exception::WaitForEvent:
-        case Dynarmic::A64::Exception::SendEvent:
-        case Dynarmic::A64::Exception::SendEventLocal:
-        case Dynarmic::A64::Exception::Yield:
-            LOG_TRACE(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast<std::size_t>(exception), pc, m_memory.Read32(pc));
-            return;
-        case Dynarmic::A64::Exception::NoExecuteFault:
-            LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#016x}", pc);
-            ReturnException(pc, PrefetchAbort);
-            return;
-        default:
-            if (m_debugger_enabled) {
-                ReturnException(pc, InstructionBreakpoint);
-            } else {
-                m_parent.LogBacktrace(m_process);
-                LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast<std::size_t>(exception), pc, m_memory.Read32(pc));
-            }
+        return;
+    default:
+        if (m_debugger_enabled) {
+            ReturnException(pc, InstructionBreakpoint);
+        } else {
+            m_parent.LogBacktrace(m_process);
+            LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast<std::size_t>(exception), pc, m_memory.Read32(pc));
         }
     }
+}
 
-    void CallSVC(u32 svc) override {
-        m_parent.m_svc = svc;
-        m_parent.m_jit->HaltExecution(SupervisorCall);
-    }
+void DynarmicCallbacks64::CallSVC(u32 svc) {
+    m_parent.m_svc = svc;
+    m_parent.m_jit->HaltExecution(SupervisorCall);
+}
 
-    void AddTicks(u64 ticks) override {
-        ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");
+void DynarmicCallbacks64::AddTicks(u64 ticks) {
+    ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");
 
-        // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
-        // rough approximation of the amount of executed ticks in the system, it may be thrown off
-        // if not all cores are doing a similar amount of work. Instead of doing this, we should
-        // device a way so that timing is consistent across all cores without increasing the ticks 4
-        // times.
-        u64 amortized_ticks = ticks / Core::Hardware::NUM_CPU_CORES;
-        // Always execute at least one tick.
-        amortized_ticks = std::max<u64>(amortized_ticks, 1);
+    // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a
+    // rough approximation of the amount of executed ticks in the system, it may be thrown off
+    // if not all cores are doing a similar amount of work. Instead of doing this, we should
+    // device a way so that timing is consistent across all cores without increasing the ticks 4
+    // times.
+    u64 amortized_ticks = ticks / Core::Hardware::NUM_CPU_CORES;
+    // Always execute at least one tick.
+    amortized_ticks = std::max<u64>(amortized_ticks, 1);
 
-        m_parent.m_system.CoreTiming().AddTicks(amortized_ticks);
-    }
+    m_parent.m_system.CoreTiming().AddTicks(amortized_ticks);
+}
 
-    u64 GetTicksRemaining() override {
-        ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled");
+u64 DynarmicCallbacks64::GetTicksRemaining() {
+    ASSERT(!m_parent.m_uses_wall_clock && "Dynarmic ticking disabled");
+    return std::max<s64>(m_parent.m_system.CoreTiming().GetDowncount(), 0);
+}
 
-        return std::max<s64>(m_parent.m_system.CoreTiming().GetDowncount(), 0);
-    }
-
-    u64 GetCNTPCT() override {
-        return m_parent.m_system.CoreTiming().GetClockTicks();
-    }
-
-    bool CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type) {
-        if (!m_check_memory_access) {
-            return true;
-        }
-
-        if (!m_memory.IsValidVirtualAddressRange(addr, size)) {
-            LOG_CRITICAL(Core_ARM, "Stopping execution due to unmapped memory access at {:#x}",
-                         addr);
-            m_parent.m_jit->HaltExecution(PrefetchAbort);
-            return false;
-        }
-
-        if (!m_debugger_enabled) {
-            return true;
-        }
-
-        const auto match{m_parent.MatchingWatchpoint(addr, size, type)};
-        if (match) {
-            m_parent.m_halted_watchpoint = match;
-            m_parent.m_jit->HaltExecution(DataAbort);
-            return false;
-        }
+u64 DynarmicCallbacks64::GetCNTPCT() {
+    return m_parent.m_system.CoreTiming().GetClockTicks();
+}
 
+bool DynarmicCallbacks64::CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type) {
+    if (!m_check_memory_access) {
         return true;
     }
 
-    void ReturnException(u64 pc, Dynarmic::HaltReason hr) {
-        m_parent.GetContext(m_parent.m_breakpoint_context);
-        m_parent.m_breakpoint_context.pc = pc;
-        m_parent.m_jit->HaltExecution(hr);
+    if (!m_memory.IsValidVirtualAddressRange(addr, size)) {
+        LOG_CRITICAL(Core_ARM, "Stopping execution due to unmapped memory access at {:#x}",
+                        addr);
+        m_parent.m_jit->HaltExecution(PrefetchAbort);
+        return false;
     }
 
-    ArmDynarmic64& m_parent;
-    Core::Memory::Memory& m_memory;
-    u64 m_tpidrro_el0{};
-    u64 m_tpidr_el0{};
-    Kernel::KProcess* m_process{};
-    const bool m_debugger_enabled{};
-    const bool m_check_memory_access{};
-    static constexpr u64 MinimumRunCycles = 10000U;
-};
+    if (!m_debugger_enabled) {
+        return true;
+    }
 
-std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* page_table,
-                                                           std::size_t address_space_bits) const {
+    const auto match{m_parent.MatchingWatchpoint(addr, size, type)};
+    if (match) {
+        m_parent.m_halted_watchpoint = match;
+        m_parent.m_jit->HaltExecution(DataAbort);
+        return false;
+    }
+
+    return true;
+}
+
+void DynarmicCallbacks64::ReturnException(u64 pc, Dynarmic::HaltReason hr) {
+    m_parent.GetContext(m_parent.m_breakpoint_context);
+    m_parent.m_breakpoint_context.pc = pc;
+    m_parent.m_jit->HaltExecution(hr);
+}
+
+void ArmDynarmic64::MakeJit(Common::PageTable* page_table, std::size_t address_space_bits) {
     Dynarmic::A64::UserConfig config;
 
     // Callbacks
-    config.callbacks = m_cb.get();
+    config.callbacks = std::addressof(*m_cb);
 
     // Memory
     if (page_table) {
@@ -375,7 +355,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
     default:
         break;
     }
-    return std::make_shared<Dynarmic::A64::Jit>(config);
+    m_jit.emplace(config);
 }
 
 HaltReason ArmDynarmic64::RunThread(Kernel::KThread* thread) {
@@ -393,19 +373,15 @@ u32 ArmDynarmic64::GetSvcNumber() const {
 }
 
 void ArmDynarmic64::GetSvcArguments(std::span<uint64_t, 8> args) const {
-    Dynarmic::A64::Jit& j = *m_jit;
-
-    for (size_t i = 0; i < 8; i++) {
+    Dynarmic::A64::Jit const& j = *m_jit;
+    for (size_t i = 0; i < 8; i++)
         args[i] = j.GetRegister(i);
-    }
 }
 
 void ArmDynarmic64::SetSvcArguments(std::span<const uint64_t, 8> args) {
     Dynarmic::A64::Jit& j = *m_jit;
-
-    for (size_t i = 0; i < 8; i++) {
+    for (size_t i = 0; i < 8; i++)
         j.SetRegister(i, args[i]);
-    }
 }
 
 const Kernel::DebugWatchpoint* ArmDynarmic64::HaltedWatchpoint() const {
@@ -416,13 +392,14 @@ void ArmDynarmic64::RewindBreakpointInstruction() {
     this->SetContext(m_breakpoint_context);
 }
 
-ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process,
-                             DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index)
-    : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor},
-      m_cb(std::make_unique<DynarmicCallbacks64>(*this, process)), m_core_index{core_index} {
+ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process, DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index)
+    : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor}
+    , m_cb(std::make_optional<DynarmicCallbacks64>(*this, process))
+    , m_core_index{core_index}
+{
     auto& page_table = process->GetPageTable().GetBasePageTable();
     auto& page_table_impl = page_table.GetImpl();
-    m_jit = MakeJit(&page_table_impl, page_table.GetAddressSpaceWidth());
+    MakeJit(&page_table_impl, page_table.GetAddressSpaceWidth());
 }
 
 ArmDynarmic64::~ArmDynarmic64() = default;
@@ -432,17 +409,14 @@ void ArmDynarmic64::SetTpidrroEl0(u64 value) {
 }
 
 void ArmDynarmic64::GetContext(Kernel::Svc::ThreadContext& ctx) const {
-    Dynarmic::A64::Jit& j = *m_jit;
+    Dynarmic::A64::Jit const& j = *m_jit;
     auto gpr = j.GetRegisters();
     auto fpr = j.GetVectors();
-
     // TODO: this is inconvenient
-    for (size_t i = 0; i < 29; i++) {
+    for (size_t i = 0; i < 29; i++)
         ctx.r[i] = gpr[i];
-    }
     ctx.fp = gpr[29];
     ctx.lr = gpr[30];
-
     ctx.sp = j.GetSP();
     ctx.pc = j.GetPC();
     ctx.pstate = j.GetPstate();
@@ -454,16 +428,12 @@ void ArmDynarmic64::GetContext(Kernel::Svc::ThreadContext& ctx) const {
 
 void ArmDynarmic64::SetContext(const Kernel::Svc::ThreadContext& ctx) {
     Dynarmic::A64::Jit& j = *m_jit;
-
     // TODO: this is inconvenient
     std::array<u64, 31> gpr;
-
-    for (size_t i = 0; i < 29; i++) {
+    for (size_t i = 0; i < 29; i++)
         gpr[i] = ctx.r[i];
-    }
     gpr[29] = ctx.fp;
     gpr[30] = ctx.lr;
-
     j.SetRegisters(gpr);
     j.SetSP(ctx.sp);
     j.SetPC(ctx.pc);
diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h
index 08cd982b30..2ea1505ce7 100644
--- a/src/core/arm/dynarmic/arm_dynarmic_64.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_64.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
@@ -17,12 +20,57 @@ namespace Core::Memory {
 class Memory;
 }
 
+namespace Kernel {
+enum class DebugWatchpointType : u8;
+class KPRocess;
+}
+
 namespace Core {
 
-class DynarmicCallbacks64;
+class ArmDynarmic64;
 class DynarmicExclusiveMonitor;
 class System;
 
+class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
+public:
+    explicit DynarmicCallbacks64(ArmDynarmic64& parent, Kernel::KProcess* process);
+
+    u8 MemoryRead8(u64 vaddr) override;
+    u16 MemoryRead16(u64 vaddr) override;
+    u32 MemoryRead32(u64 vaddr) override;
+    u64 MemoryRead64(u64 vaddr) override;
+    Dynarmic::A64::Vector MemoryRead128(u64 vaddr) override;
+    std::optional<u32> MemoryReadCode(u64 vaddr) override;
+    void MemoryWrite8(u64 vaddr, u8 value) override;
+    void MemoryWrite16(u64 vaddr, u16 value) override;
+    void MemoryWrite32(u64 vaddr, u32 value) override;
+    void MemoryWrite64(u64 vaddr, u64 value) override;
+    void MemoryWrite128(u64 vaddr, Dynarmic::A64::Vector value) override;
+    bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override;
+    bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override;
+    bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override;
+    bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override;
+    bool MemoryWriteExclusive128(u64 vaddr, Dynarmic::A64::Vector value, Dynarmic::A64::Vector expected) override;
+    void InterpreterFallback(u64 pc, std::size_t num_instructions) override;
+    void InstructionCacheOperationRaised(Dynarmic::A64::InstructionCacheOperation op, u64 value) override;
+    void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override;
+    void CallSVC(u32 svc) override;
+    void AddTicks(u64 ticks) override;
+    u64 GetTicksRemaining() override;
+    u64 GetCNTPCT() override;
+    bool CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type);
+    void ReturnException(u64 pc, Dynarmic::HaltReason hr);
+
+    ArmDynarmic64& m_parent;
+    Core::Memory::Memory& m_memory;
+    u64 m_tpidrro_el0{};
+    u64 m_tpidr_el0{};
+    Kernel::KProcess* m_process{};
+    const bool m_debugger_enabled{};
+    const bool m_check_memory_access{};
+    static constexpr u64 MinimumRunCycles = 10000U;
+};
+
 class ArmDynarmic64 final : public ArmInterface {
 public:
     ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process,
@@ -59,12 +107,11 @@ private:
 private:
     friend class DynarmicCallbacks64;
 
-    std::shared_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable* page_table,
-                                                std::size_t address_space_bits) const;
-    std::unique_ptr<DynarmicCallbacks64> m_cb{};
+    void MakeJit(Common::PageTable* page_table, std::size_t address_space_bits);
+    std::optional<DynarmicCallbacks64> m_cb{};
     std::size_t m_core_index{};
 
-    std::shared_ptr<Dynarmic::A64::Jit> m_jit{};
+    std::optional<Dynarmic::A64::Jit> m_jit{};
 
     // SVC callback
     u32 m_svc{};
diff --git a/src/core/core.cpp b/src/core/core.cpp
index bf97184f8f..aea2b2b060 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -112,11 +112,10 @@ struct System::Impl {
     u64 program_id;
 
     void Initialize(System& system) {
-        device_memory = std::make_unique<Core::DeviceMemory>();
+        device_memory.emplace();
 
         is_multicore = Settings::values.use_multi_core.GetValue();
-        extended_memory_layout =
-            Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb;
+        extended_memory_layout = Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb;
 
         core_timing.SetMulticore(is_multicore);
         core_timing.Initialize([&system]() { system.RegisterHostThread(); });
@@ -132,7 +131,7 @@ struct System::Impl {
         // Create default implementations of applets if one is not provided.
         frontend_applets.SetDefaultAppletsIfMissing();
 
-        is_async_gpu = Settings::values.use_asynchronous_gpu_emulation.GetValue();
+        auto const is_async_gpu = Settings::values.use_asynchronous_gpu_emulation.GetValue();
 
         kernel.SetMulticore(is_multicore);
         cpu_manager.SetMulticore(is_multicore);
@@ -254,7 +253,7 @@ struct System::Impl {
     }
 
     void InitializeDebugger(System& system, u16 port) {
-        debugger = std::make_unique<Debugger>(system, port);
+        debugger.emplace(system, port);
     }
 
     void InitializeKernel(System& system) {
@@ -268,24 +267,22 @@ struct System::Impl {
     }
 
     SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) {
-        host1x_core = std::make_unique<Tegra::Host1x::Host1x>(system);
+        host1x_core.emplace(system);
         gpu_core = VideoCore::CreateGPU(emu_window, system);
-        if (!gpu_core) {
+        if (!gpu_core)
             return SystemResultStatus::ErrorVideoCore;
-        }
 
-        audio_core = std::make_unique<AudioCore::AudioCore>(system);
+        audio_core.emplace(system);
 
         service_manager = std::make_shared<Service::SM::ServiceManager>(kernel);
-        services =
-            std::make_unique<Service::Services>(service_manager, system, stop_event.get_token());
+        services.emplace(service_manager, system, stop_event.get_token());
 
         is_powered_on = true;
         exit_locked = false;
         exit_requested = false;
 
         if (Settings::values.enable_renderdoc_hotkey) {
-            renderdoc_api = std::make_unique<Tools::RenderdocAPI>();
+            renderdoc_api.emplace();
         }
 
         LOG_DEBUG(Core, "Initialized OK");
@@ -303,16 +300,11 @@ struct System::Impl {
         // Create the application process
         Loader::ResultStatus load_result{};
         std::vector<u8> control;
-        auto process =
-            Service::AM::CreateApplicationProcess(control, app_loader, load_result, system, file,
-                                                  params.program_id, params.program_index);
-
+        auto process = Service::AM::CreateApplicationProcess(control, app_loader, load_result, system, file, params.program_id, params.program_index);
         if (load_result != Loader::ResultStatus::Success) {
             LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", load_result);
             ShutdownMainProcess();
-
-            return static_cast<SystemResultStatus>(
-                static_cast<u32>(SystemResultStatus::ErrorLoader) + static_cast<u32>(load_result));
+            return SystemResultStatus(u32(SystemResultStatus::ErrorLoader) + u32(load_result));
         }
 
         if (!app_loader) {
@@ -337,8 +329,7 @@ struct System::Impl {
         // Set up the rest of the system.
         SystemResultStatus init_result{SetupForApplicationProcess(system, emu_window)};
         if (init_result != SystemResultStatus::Success) {
-            LOG_CRITICAL(Core, "Failed to initialize system (Error {})!",
-                         static_cast<int>(init_result));
+            LOG_CRITICAL(Core, "Failed to initialize system (Error {})!", int(init_result));
             ShutdownMainProcess();
             return init_result;
         }
@@ -361,24 +352,19 @@ struct System::Impl {
             }
         }
 
-        perf_stats = std::make_unique<PerfStats>(params.program_id);
+        perf_stats.emplace(params.program_id);
         // Reset counters and set time origin to current frame
         GetAndResetPerfStats();
         perf_stats->BeginSystemFrame();
 
-        std::string title_version;
-        const FileSys::PatchManager pm(params.program_id, system.GetFileSystemController(),
-                                       system.GetContentProvider());
-        const auto metadata = pm.GetControlMetadata();
-        if (metadata.first != nullptr) {
-            title_version = metadata.first->GetVersionString();
-        }
+        const FileSys::PatchManager pm(params.program_id, system.GetFileSystemController(), system.GetContentProvider());
+        auto const metadata = pm.GetControlMetadata();
+        std::string title_version = metadata.first != nullptr ? metadata.first->GetVersionString() : "";
 
         if (app_loader->ReadProgramId(program_id) != Loader::ResultStatus::Success) {
             LOG_ERROR(Core, "Failed to find program id for ROM");
         }
 
-
         GameSettings::LoadOverrides(program_id, gpu_core->Renderer());
         if (auto room_member = Network::GetRoomMember().lock()) {
             Network::GameInfo game_info;
@@ -387,9 +373,7 @@ struct System::Impl {
             game_info.version = title_version;
             room_member->SendGameInfo(game_info);
         }
-
-        status = SystemResultStatus::Success;
-        return status;
+        return SystemResultStatus::Success;
     }
 
     void ShutdownMainProcess() {
@@ -448,112 +432,79 @@ struct System::Impl {
     }
 
     Loader::ResultStatus GetGameName(std::string& out) const {
-        if (app_loader == nullptr)
-            return Loader::ResultStatus::ErrorNotInitialized;
-        return app_loader->ReadTitle(out);
-    }
-
-    void SetStatus(SystemResultStatus new_status, const char* details = nullptr) {
-        status = new_status;
-        if (details) {
-            status_details = details;
-        }
+        return app_loader ? app_loader->ReadTitle(out) : Loader::ResultStatus::ErrorNotInitialized;
     }
 
     PerfStatsResults GetAndResetPerfStats() {
         return perf_stats->GetAndResetStats(core_timing.GetGlobalTimeUs());
     }
 
-    mutable std::mutex suspend_guard;
-    std::atomic_bool is_paused{};
-    std::atomic<bool> is_shutting_down{};
-
     Timing::CoreTiming core_timing;
     Kernel::KernelCore kernel;
     /// RealVfsFilesystem instance
     FileSys::VirtualFilesystem virtual_filesystem;
-    /// ContentProviderUnion instance
-    std::unique_ptr<FileSys::ContentProviderUnion> content_provider;
     Service::FileSystem::FileSystemController fs_controller;
-    /// AppLoader used to load the current executing application
-    std::unique_ptr<Loader::AppLoader> app_loader;
-    std::unique_ptr<Tegra::GPU> gpu_core;
-    std::unique_ptr<Tegra::Host1x::Host1x> host1x_core;
-    std::unique_ptr<Core::DeviceMemory> device_memory;
-    std::unique_ptr<AudioCore::AudioCore> audio_core;
     Core::HID::HIDCore hid_core;
-
     CpuManager cpu_manager;
-    std::atomic_bool is_powered_on{};
-    bool exit_locked = false;
-    bool exit_requested = false;
-
-    bool nvdec_active{};
-
     Reporter reporter;
-    std::unique_ptr<Memory::CheatEngine> cheat_engine;
-    std::unique_ptr<Tools::Freezer> memory_freezer;
-    std::array<u8, 0x20> build_id{};
-
-    std::unique_ptr<Tools::RenderdocAPI> renderdoc_api;
-
     /// Applets
     Service::AM::AppletManager applet_manager;
     Service::AM::Frontend::FrontendAppletHolder frontend_applets;
-
     /// APM (Performance) services
     Service::APM::Controller apm_controller{core_timing};
-
     /// Service State
     Service::Glue::ARPManager arp_manager;
     Service::Account::ProfileManager profile_manager;
+    /// Network instance
+    Network::NetworkInstance network_instance;
+    Core::SpeedLimiter speed_limiter;
+    ExecuteProgramCallback execute_program_callback;
+    ExitCallback exit_callback;
+
+    std::optional<Service::Services> services;
+    std::optional<Core::Debugger> debugger;
+    std::optional<Service::KernelHelpers::ServiceContext> general_channel_context;
+    std::optional<Service::Event> general_channel_event;
+    std::optional<Core::PerfStats> perf_stats;
+    std::optional<Tegra::Host1x::Host1x> host1x_core;
+    std::optional<Core::DeviceMemory> device_memory;
+    std::optional<AudioCore::AudioCore> audio_core;
+    std::optional<Memory::CheatEngine> cheat_engine;
+    std::optional<Tools::Freezer> memory_freezer;
+    std::optional<Tools::RenderdocAPI> renderdoc_api;
+
+    std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES> gpu_dirty_memory_managers;
+    std::vector<std::vector<u8>> user_channel;
+    std::vector<std::vector<u8>> general_channel;
+
+    std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
+    std::array<u8, 0x20> build_id{};
 
     /// Service manager
     std::shared_ptr<Service::SM::ServiceManager> service_manager;
-
-    /// Services
-    std::unique_ptr<Service::Services> services;
-
-    /// Network instance
-    Network::NetworkInstance network_instance;
-
-    /// Debugger
-    std::unique_ptr<Core::Debugger> debugger;
-
-    SystemResultStatus status = SystemResultStatus::Success;
-    std::string status_details = "";
-
-    std::unique_ptr<Core::PerfStats> perf_stats;
-    Core::SpeedLimiter speed_limiter;
-
-    bool is_multicore{};
-    bool is_async_gpu{};
-    bool extended_memory_layout{};
-
-    ExecuteProgramCallback execute_program_callback;
-    ExitCallback exit_callback;
+    /// ContentProviderUnion instance
+    std::unique_ptr<FileSys::ContentProviderUnion> content_provider;
+    /// AppLoader used to load the current executing application
+    std::unique_ptr<Loader::AppLoader> app_loader;
+    std::unique_ptr<Tegra::GPU> gpu_core;
     std::stop_source stop_event;
 
-    std::array<u64, Core::Hardware::NUM_CPU_CORES> dynarmic_ticks{};
-
-    std::array<Core::GPUDirtyMemoryManager, Core::Hardware::NUM_CPU_CORES>
-        gpu_dirty_memory_managers;
-
-    std::deque<std::vector<u8>> user_channel;
-
+    mutable std::mutex suspend_guard;
     std::mutex general_channel_mutex;
-    std::deque<std::vector<u8>> general_channel;
-    std::unique_ptr<Service::KernelHelpers::ServiceContext> general_channel_context; // lazy
-    std::unique_ptr<Service::Event> general_channel_event;                           // lazy
-    bool general_channel_initialized{false};
+    std::atomic_bool is_paused{};
+    std::atomic_bool is_shutting_down{};
+    std::atomic_bool is_powered_on{};
+    bool is_multicore : 1 = false;
+    bool extended_memory_layout : 1 = false;
+    bool exit_locked : 1 = false;
+    bool exit_requested : 1 = false;
+    bool nvdec_active : 1 = false;
 
     void EnsureGeneralChannelInitialized(System& system) {
-        if (general_channel_initialized) {
-            return;
+        if (!general_channel_event) {
+            general_channel_context.emplace(system, "GeneralChannel");
+            general_channel_event.emplace(*general_channel_context);
         }
-        general_channel_context = std::make_unique<Service::KernelHelpers::ServiceContext>(system, "GeneralChannel");
-        general_channel_event = std::make_unique<Service::Event>(*general_channel_context);
-        general_channel_initialized = true;
     }
 };
 
@@ -776,14 +727,6 @@ Loader::ResultStatus System::GetGameName(std::string& out) const {
     return impl->GetGameName(out);
 }
 
-void System::SetStatus(SystemResultStatus new_status, const char* details) {
-    impl->SetStatus(new_status, details);
-}
-
-const std::string& System::GetStatusDetails() const {
-    return impl->status_details;
-}
-
 Loader::AppLoader& System::GetAppLoader() {
     return *impl->app_loader;
 }
@@ -803,7 +746,7 @@ FileSys::VirtualFilesystem System::GetFilesystem() const {
 void System::RegisterCheatList(const std::vector<Memory::CheatEntry>& list,
                                const std::array<u8, 32>& build_id, u64 main_region_begin,
                                u64 main_region_size) {
-    impl->cheat_engine = std::make_unique<Memory::CheatEngine>(*this, list, build_id);
+    impl->cheat_engine.emplace(*this, list, build_id);
     impl->cheat_engine->SetMainMemoryParameters(main_region_begin, main_region_size);
 }
 
@@ -964,11 +907,13 @@ void System::ExecuteProgram(std::size_t program_index) {
     }
 }
 
-std::deque<std::vector<u8>>& System::GetUserChannel() {
+/// @brief Gets a reference to the user channel stack.
+/// It is used to transfer data between programs.
+std::vector<std::vector<u8>>& System::GetUserChannel() {
     return impl->user_channel;
 }
 
-std::deque<std::vector<u8>>& System::GetGeneralChannel() {
+std::vector<std::vector<u8>>& System::GetGeneralChannel() {
     return impl->general_channel;
 }
 
@@ -984,7 +929,7 @@ void System::PushGeneralChannelData(std::vector<u8>&& data) {
 
 bool System::TryPopGeneralChannel(std::vector<u8>& out_data) {
     std::scoped_lock lk{impl->general_channel_mutex};
-    if (!impl->general_channel_initialized || impl->general_channel.empty()) {
+    if (!impl->general_channel_event || impl->general_channel.empty()) {
         return false;
     }
     out_data = std::move(impl->general_channel.back());
diff --git a/src/core/core.h b/src/core/core.h
index 60bf73d4e1..702c5cc81b 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -325,10 +325,6 @@ public:
     /// Gets the name of the current game
     [[nodiscard]] Loader::ResultStatus GetGameName(std::string& out) const;
 
-    void SetStatus(SystemResultStatus new_status, const char* details);
-
-    [[nodiscard]] const std::string& GetStatusDetails() const;
-
     [[nodiscard]] Loader::AppLoader& GetAppLoader();
     [[nodiscard]] const Loader::AppLoader& GetAppLoader() const;
 
@@ -424,13 +420,8 @@ public:
      */
     void ExecuteProgram(std::size_t program_index);
 
-    /**
-     * Gets a reference to the user channel stack.
-     * It is used to transfer data between programs.
-     */
-    [[nodiscard]] std::deque<std::vector<u8>>& GetUserChannel();
-
-    [[nodiscard]] std::deque<std::vector<u8>>& GetGeneralChannel();
+    [[nodiscard]] std::vector<std::vector<u8>>& GetUserChannel();
+    [[nodiscard]] std::vector<std::vector<u8>>& GetGeneralChannel();
     void PushGeneralChannelData(std::vector<u8>&& data);
     bool TryPopGeneralChannel(std::vector<u8>& out_data);
     [[nodiscard]] Service::Event& GetGeneralChannelEvent();
diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp
index 3c847c8359..5a582c8cff 100644
--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@@ -53,13 +53,6 @@ CoreTiming::~CoreTiming() {
     Reset();
 }
 
-void CoreTiming::ThreadEntry(CoreTiming& instance) {
-    Common::SetCurrentThreadName("HostTiming");
-    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
-    instance.on_thread_init();
-    instance.ThreadLoop();
-}
-
 void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
     Reset();
     on_thread_init = std::move(on_thread_init_);
@@ -67,7 +60,12 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
     shutting_down = false;
     cpu_ticks = 0;
     if (is_multicore) {
-        timer_thread = std::make_unique<std::jthread>(ThreadEntry, std::ref(*this));
+        timer_thread.emplace([](CoreTiming& instance) {
+            Common::SetCurrentThreadName("HostTiming");
+            Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+            instance.on_thread_init();
+            instance.ThreadLoop();
+        }, std::ref(*this));
     }
 }
 
diff --git a/src/core/core_timing.h b/src/core/core_timing.h
index 7e4dff7f3d..ae9f56d519 100644
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
@@ -140,8 +143,6 @@ public:
 
 private:
     struct Event;
-
-    static void ThreadEntry(CoreTiming& instance);
     void ThreadLoop();
 
     void Reset();
@@ -164,7 +165,7 @@ private:
     Common::Event pause_event{};
     mutable std::mutex basic_lock;
     std::mutex advance_lock;
-    std::unique_ptr<std::jthread> timer_thread;
+    std::optional<std::jthread> timer_thread;
     std::atomic<bool> paused{};
     std::atomic<bool> paused_set{};
     std::atomic<bool> wait_set{};
diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp
index 322f971ba3..082049f957 100644
--- a/src/core/hle/kernel/k_process.cpp
+++ b/src/core/hle/kernel/k_process.cpp
@@ -1148,9 +1148,17 @@ Result KProcess::GetThreadList(s32* out_num_threads, KProcessAddress out_thread_
 void KProcess::Switch(KProcess* cur_process, KProcess* next_process) {}
 
 KProcess::KProcess(KernelCore& kernel)
-    : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel},
-      m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()},
-      m_handle_table{kernel}, m_exclusive_monitor{}, m_memory{kernel.System()} {}
+    : KAutoObjectWithSlabHeapAndContainer(kernel)
+    , m_exclusive_monitor{}
+    , m_memory{kernel.System()}
+    , m_handle_table{kernel}
+    , m_page_table{kernel}
+    , m_state_lock{kernel}
+    , m_list_lock{kernel}
+    , m_cond_var{kernel.System()}
+    , m_address_arbiter{kernel.System()}
+{}
+
 KProcess::~KProcess() = default;
 
 Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size,
diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h
index 92ddb1aca4..13717cc090 100644
--- a/src/core/hle/kernel/k_process.h
+++ b/src/core/hle/kernel/k_process.h
@@ -66,60 +66,55 @@ public:
 
 private:
     using SharedMemoryInfoList = Common::IntrusiveListBaseTraits<KSharedMemoryInfo>::ListType;
-    using TLPTree =
-        Common::IntrusiveRedBlackTreeBaseTraits<KThreadLocalPage>::TreeType<KThreadLocalPage>;
+    using TLPTree = Common::IntrusiveRedBlackTreeBaseTraits<KThreadLocalPage>::TreeType<KThreadLocalPage>;
     using TLPIterator = TLPTree::iterator;
 
 private:
-    KProcessPageTable m_page_table;
-    std::atomic<size_t> m_used_kernel_memory_size{};
-    TLPTree m_fully_used_tlp_tree{};
-    TLPTree m_partially_used_tlp_tree{};
-    s32 m_ideal_core_id{};
-    KResourceLimit* m_resource_limit{};
-    KSystemResource* m_system_resource{};
-    size_t m_memory_release_hint{};
-    State m_state{};
-    KLightLock m_state_lock;
-    KLightLock m_list_lock;
-    KConditionVariable m_cond_var;
-    KAddressArbiter m_address_arbiter;
-    std::array<u64, 4> m_entropy{};
-    bool m_is_signaled{};
-    bool m_is_initialized{};
-    u32 m_pointer_buffer_size = 0x8000;  // Default pointer buffer size (can be game-specific later)
-    bool m_is_application{};
-    bool m_is_default_application_system_resource{};
-    bool m_is_hbl{};
-    std::array<char, 13> m_name{};
-    std::atomic<u16> m_num_running_threads{};
-    Svc::CreateProcessFlag m_flags{};
-    KMemoryManager::Pool m_memory_pool{};
-    s64 m_schedule_count{};
-    KCapabilities m_capabilities{};
-    u64 m_program_id{};
-    u64 m_process_id{};
-    KProcessAddress m_code_address{};
-    size_t m_code_size{};
-    size_t m_main_thread_stack_size{};
-    size_t m_max_process_memory{};
-    u32 m_version{};
-    KHandleTable m_handle_table;
-    KProcessAddress m_plr_address{};
-    KThread* m_exception_thread{};
-    ThreadList m_thread_list{};
-    SharedMemoryInfoList m_shared_memory_list{};
-    bool m_is_suspended{};
-    bool m_is_immortal{};
-    bool m_is_handle_table_initialized{};
-    std::array<std::unique_ptr<Core::ArmInterface>, Core::Hardware::NUM_CPU_CORES>
-        m_arm_interfaces{};
+    std::array<std::unique_ptr<Core::ArmInterface>, Core::Hardware::NUM_CPU_CORES> m_arm_interfaces{};
     std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_running_threads{};
     std::array<u64, Core::Hardware::NUM_CPU_CORES> m_running_thread_idle_counts{};
     std::array<u64, Core::Hardware::NUM_CPU_CORES> m_running_thread_switch_counts{};
     std::array<KThread*, Core::Hardware::NUM_CPU_CORES> m_pinned_threads{};
     std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS> m_watchpoints{};
     std::map<KProcessAddress, u64> m_debug_page_refcounts{};
+#ifdef HAS_NCE
+    std::unordered_map<u64, u64> m_post_handlers{};
+#endif
+    std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor;
+    Core::Memory::Memory m_memory;
+    KCapabilities m_capabilities{};
+    KProcessAddress m_code_address{};
+    KHandleTable m_handle_table;
+    KProcessAddress m_plr_address{};
+    ThreadList m_thread_list{};
+    SharedMemoryInfoList m_shared_memory_list{};
+    KProcessPageTable m_page_table;
+    std::atomic<size_t> m_used_kernel_memory_size{};
+    TLPTree m_fully_used_tlp_tree{};
+    TLPTree m_partially_used_tlp_tree{};
+    State m_state{};
+    KLightLock m_state_lock;
+    KLightLock m_list_lock;
+    KConditionVariable m_cond_var;
+    KAddressArbiter m_address_arbiter;
+    std::array<u64, 4> m_entropy{};
+    u32 m_pointer_buffer_size = 0x8000;  // Default pointer buffer size (can be game-specific later)
+    std::array<char, 13> m_name{};
+    Svc::CreateProcessFlag m_flags{};
+    KMemoryManager::Pool m_memory_pool{};
+
+    KResourceLimit* m_resource_limit{};
+    KSystemResource* m_system_resource{};
+    KThread* m_exception_thread{};
+
+    size_t m_code_size{};
+    size_t m_main_thread_stack_size{};
+    size_t m_max_process_memory{};
+    size_t m_memory_release_hint{};
+    s64 m_schedule_count{};
+    u64 m_program_id{};
+    u64 m_process_id{};
+
     std::atomic<s64> m_cpu_time{};
     std::atomic<s64> m_num_process_switches{};
     std::atomic<s64> m_num_thread_switches{};
@@ -128,11 +123,20 @@ private:
     std::atomic<s64> m_num_ipc_messages{};
     std::atomic<s64> m_num_ipc_replies{};
     std::atomic<s64> m_num_ipc_receives{};
-#ifdef HAS_NCE
-    std::unordered_map<u64, u64> m_post_handlers{};
-#endif
-    std::unique_ptr<Core::ExclusiveMonitor> m_exclusive_monitor;
-    Core::Memory::Memory m_memory;
+
+    s32 m_ideal_core_id{};
+    u32 m_version{};
+
+    std::atomic<u16> m_num_running_threads{};
+
+    bool m_is_signaled : 1 = false;
+    bool m_is_initialized : 1 = false;
+    bool m_is_application : 1 = false;
+    bool m_is_default_application_system_resource : 1 = false;
+    bool m_is_hbl : 1 = false;
+    bool m_is_suspended : 1 = false;
+    bool m_is_immortal : 1 = false;
+    bool m_is_handle_table_initialized : 1 = false;
 
 private:
     Result StartTermination();
diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp
index 062387a29b..6986a98e35 100644
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@@ -88,11 +88,11 @@ struct KernelCore::Impl {
     }
 
     void Initialize(KernelCore& kernel) {
-        hardware_timer = std::make_unique<Kernel::KHardwareTimer>(kernel);
+        hardware_timer.emplace(kernel);
         hardware_timer->Initialize();
 
-        global_object_list_container = std::make_unique<KAutoObjectWithListContainer>(kernel);
-        global_scheduler_context = std::make_unique<Kernel::GlobalSchedulerContext>(kernel);
+        global_object_list_container.emplace(kernel);
+        global_scheduler_context.emplace(kernel);
 
         // Derive the initial memory layout from the emulated board
         Init::InitializeSlabResourceCounts(kernel);
@@ -212,10 +212,9 @@ struct KernelCore::Impl {
 
     void InitializePhysicalCores() {
         for (u32 i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
-            const s32 core{static_cast<s32>(i)};
-
-            schedulers[i] = std::make_unique<Kernel::KScheduler>(system.Kernel());
-            cores[i] = std::make_unique<Kernel::PhysicalCore>(system.Kernel(), i);
+            auto const core = s32(i);
+            schedulers[i].emplace(system.Kernel());
+            cores[i].emplace(system.Kernel(), i);
 
             auto* main_thread{Kernel::KThread::Create(system.Kernel())};
             main_thread->SetCurrentCore(core);
@@ -280,57 +279,56 @@ struct KernelCore::Impl {
         size -= rc_size;
 
         // Initialize the resource managers' shared page manager.
-        resource_manager_page_manager = std::make_unique<KDynamicPageManager>();
+        resource_manager_page_manager.emplace();
         resource_manager_page_manager->Initialize(address, size, std::max<size_t>(PageSize, KPageBufferSlabHeap::BufferSize));
 
         // Initialize the KPageBuffer slab heap.
         page_buffer_slab_heap.Initialize(system);
 
         // Initialize the fixed-size slabheaps.
-        app_memory_block_heap = std::make_unique<KMemoryBlockSlabHeap>();
-        sys_memory_block_heap = std::make_unique<KMemoryBlockSlabHeap>();
-        block_info_heap = std::make_unique<KBlockInfoSlabHeap>();
-        app_memory_block_heap->Initialize(resource_manager_page_manager.get(), ApplicationMemoryBlockSlabHeapSize);
-        sys_memory_block_heap->Initialize(resource_manager_page_manager.get(), SystemMemoryBlockSlabHeapSize);
-        block_info_heap->Initialize(resource_manager_page_manager.get(), BlockInfoSlabHeapSize);
+        app_memory_block_heap.emplace();
+        sys_memory_block_heap.emplace();
+        block_info_heap.emplace();
+        app_memory_block_heap->Initialize(std::addressof(*resource_manager_page_manager), ApplicationMemoryBlockSlabHeapSize);
+        sys_memory_block_heap->Initialize(std::addressof(*resource_manager_page_manager), SystemMemoryBlockSlabHeapSize);
+        block_info_heap->Initialize(std::addressof(*resource_manager_page_manager), BlockInfoSlabHeapSize);
 
         // Reserve all but a fixed number of remaining pages for the page table heap.
         const size_t num_pt_pages = resource_manager_page_manager->GetCount() - resource_manager_page_manager->GetUsed() - ReservedDynamicPageCount;
-        page_table_heap = std::make_unique<KPageTableSlabHeap>();
+        page_table_heap.emplace();
 
         // TODO(bunnei): Pass in address once we support kernel virtual memory allocations.
         page_table_heap->Initialize(
-            resource_manager_page_manager.get(), num_pt_pages,
+            std::addressof(*resource_manager_page_manager), num_pt_pages,
             /*GetPointer<KPageTableManager::RefCount>(address + size)*/ nullptr);
 
         // Setup the slab managers.
         KDynamicPageManager* const app_dynamic_page_manager = nullptr;
         KDynamicPageManager* const sys_dynamic_page_manager =
             /*KTargetSystem::IsDynamicResourceLimitsEnabled()*/ true
-            ? resource_manager_page_manager.get()
-            : nullptr;
-        app_memory_block_manager = std::make_unique<KMemoryBlockSlabManager>();
-        sys_memory_block_manager = std::make_unique<KMemoryBlockSlabManager>();
-        app_block_info_manager = std::make_unique<KBlockInfoManager>();
-        sys_block_info_manager = std::make_unique<KBlockInfoManager>();
-        app_page_table_manager = std::make_unique<KPageTableManager>();
-        sys_page_table_manager = std::make_unique<KPageTableManager>();
+            ? std::addressof(*resource_manager_page_manager) : nullptr;
+        app_memory_block_manager.emplace();
+        sys_memory_block_manager.emplace();
+        app_block_info_manager.emplace();
+        sys_block_info_manager.emplace();
+        app_page_table_manager.emplace();
+        sys_page_table_manager.emplace();
 
-        app_memory_block_manager->Initialize(app_dynamic_page_manager, app_memory_block_heap.get());
-        sys_memory_block_manager->Initialize(sys_dynamic_page_manager, sys_memory_block_heap.get());
+        app_memory_block_manager->Initialize(app_dynamic_page_manager, std::addressof(*app_memory_block_heap));
+        sys_memory_block_manager->Initialize(sys_dynamic_page_manager, std::addressof(*sys_memory_block_heap));
 
-        app_block_info_manager->Initialize(app_dynamic_page_manager, block_info_heap.get());
-        sys_block_info_manager->Initialize(sys_dynamic_page_manager, block_info_heap.get());
+        app_block_info_manager->Initialize(app_dynamic_page_manager, std::addressof(*block_info_heap));
+        sys_block_info_manager->Initialize(sys_dynamic_page_manager, std::addressof(*block_info_heap));
 
-        app_page_table_manager->Initialize(app_dynamic_page_manager, page_table_heap.get());
-        sys_page_table_manager->Initialize(sys_dynamic_page_manager, page_table_heap.get());
+        app_page_table_manager->Initialize(app_dynamic_page_manager, std::addressof(*page_table_heap));
+        sys_page_table_manager->Initialize(sys_dynamic_page_manager, std::addressof(*page_table_heap));
 
         // Check that we have the correct number of dynamic pages available.
         ASSERT(resource_manager_page_manager->GetCount() - resource_manager_page_manager->GetUsed() == ReservedDynamicPageCount);
 
         // Create the system page table managers.
-        app_system_resource = std::make_unique<KSystemResource>(kernel);
-        sys_system_resource = std::make_unique<KSystemResource>(kernel);
+        app_system_resource.emplace(kernel);
+        sys_system_resource.emplace(kernel);
         KAutoObject::Create(std::addressof(*app_system_resource));
         KAutoObject::Create(std::addressof(*sys_system_resource));
 
@@ -349,7 +347,7 @@ struct KernelCore::Impl {
     }
 
     void InitializeGlobalData(KernelCore& kernel) {
-        object_name_global_data = std::make_unique<KObjectNameGlobalData>(kernel);
+        object_name_global_data.emplace(kernel);
     }
 
     void MakeApplicationProcess(KProcess* process) {
@@ -431,7 +429,7 @@ struct KernelCore::Impl {
     }
 
     void DeriveInitialMemoryLayout() {
-        memory_layout = std::make_unique<KMemoryLayout>();
+        memory_layout.emplace();
 
         // Insert the root region for the virtual memory tree, from which all other regions will
         // derive.
@@ -726,7 +724,7 @@ struct KernelCore::Impl {
 
     void InitializeMemoryLayout() {
         // Initialize the memory manager.
-        memory_manager = std::make_unique<KMemoryManager>(system);
+        memory_manager.emplace(system);
         const auto& management_region = memory_layout->GetPoolManagementRegion();
         ASSERT(management_region.GetEndAddress() != 0);
         memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize());
@@ -774,8 +772,8 @@ struct KernelCore::Impl {
     std::mutex process_list_lock;
     std::vector<KProcess*> process_list;
     KProcess* application_process{};
-    std::unique_ptr<Kernel::GlobalSchedulerContext> global_scheduler_context;
-    std::unique_ptr<Kernel::KHardwareTimer> hardware_timer;
+    std::optional<Kernel::GlobalSchedulerContext> global_scheduler_context;
+    std::optional<Kernel::KHardwareTimer> hardware_timer;
 
     Init::KSlabResourceCounts slab_resource_counts{};
     KResourceLimit* system_resource_limit{};
@@ -784,9 +782,9 @@ struct KernelCore::Impl {
 
     std::shared_ptr<Core::Timing::EventType> preemption_event;
 
-    std::unique_ptr<KAutoObjectWithListContainer> global_object_list_container;
+    std::optional<KAutoObjectWithListContainer> global_object_list_container;
 
-    std::unique_ptr<KObjectNameGlobalData> object_name_global_data;
+    std::optional<KObjectNameGlobalData> object_name_global_data;
 
     std::unordered_set<KAutoObject*> registered_objects;
     std::unordered_set<KAutoObject*> registered_in_use_objects;
@@ -794,28 +792,28 @@ struct KernelCore::Impl {
     std::mutex server_lock;
     std::vector<std::unique_ptr<Service::ServerManager>> server_managers;
 
-    std::array<std::unique_ptr<Kernel::PhysicalCore>, Core::Hardware::NUM_CPU_CORES> cores;
+    std::array<std::optional<Kernel::PhysicalCore>, Core::Hardware::NUM_CPU_CORES> cores;
 
     // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others
     std::atomic<u32> next_host_thread_id{Core::Hardware::NUM_CPU_CORES};
 
     // Kernel memory management
-    std::unique_ptr<KMemoryManager> memory_manager;
+    std::optional<KMemoryManager> memory_manager;
 
     // Resource managers
-    std::unique_ptr<KDynamicPageManager> resource_manager_page_manager;
-    std::unique_ptr<KPageTableSlabHeap> page_table_heap;
-    std::unique_ptr<KMemoryBlockSlabHeap> app_memory_block_heap;
-    std::unique_ptr<KMemoryBlockSlabHeap> sys_memory_block_heap;
-    std::unique_ptr<KBlockInfoSlabHeap> block_info_heap;
-    std::unique_ptr<KPageTableManager> app_page_table_manager;
-    std::unique_ptr<KPageTableManager> sys_page_table_manager;
-    std::unique_ptr<KMemoryBlockSlabManager> app_memory_block_manager;
-    std::unique_ptr<KMemoryBlockSlabManager> sys_memory_block_manager;
-    std::unique_ptr<KBlockInfoManager> app_block_info_manager;
-    std::unique_ptr<KBlockInfoManager> sys_block_info_manager;
-    std::unique_ptr<KSystemResource> app_system_resource;
-    std::unique_ptr<KSystemResource> sys_system_resource;
+    std::optional<KDynamicPageManager> resource_manager_page_manager;
+    std::optional<KPageTableSlabHeap> page_table_heap;
+    std::optional<KMemoryBlockSlabHeap> app_memory_block_heap;
+    std::optional<KMemoryBlockSlabHeap> sys_memory_block_heap;
+    std::optional<KBlockInfoSlabHeap> block_info_heap;
+    std::optional<KPageTableManager> app_page_table_manager;
+    std::optional<KPageTableManager> sys_page_table_manager;
+    std::optional<KMemoryBlockSlabManager> app_memory_block_manager;
+    std::optional<KMemoryBlockSlabManager> sys_memory_block_manager;
+    std::optional<KBlockInfoManager> app_block_info_manager;
+    std::optional<KBlockInfoManager> sys_block_info_manager;
+    std::optional<KSystemResource> app_system_resource;
+    std::optional<KSystemResource> sys_system_resource;
 
     // Shared memory for services
     Kernel::KSharedMemory* hid_shared_mem{};
@@ -825,10 +823,10 @@ struct KernelCore::Impl {
     Kernel::KSharedMemory* hidbus_shared_mem{};
 
     // Memory layout
-    std::unique_ptr<KMemoryLayout> memory_layout;
+    std::optional<KMemoryLayout> memory_layout;
 
     std::array<KThread*, Core::Hardware::NUM_CPU_CORES> shutdown_threads{};
-    std::array<std::unique_ptr<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
+    std::array<std::optional<Kernel::KScheduler>, Core::Hardware::NUM_CPU_CORES> schedulers{};
 
     bool is_multicore{};
     std::atomic_bool is_shutting_down{};
@@ -948,12 +946,9 @@ const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const {
 }
 
 Kernel::KScheduler* KernelCore::CurrentScheduler() {
-    const u32 core_id = impl->GetCurrentHostThreadID();
-    if (core_id >= Core::Hardware::NUM_CPU_CORES) {
-        // This is expected when called from not a guest thread
-        return {};
-    }
-    return impl->schedulers[core_id].get();
+    if (auto const core_id = impl->GetCurrentHostThreadID(); core_id < Core::Hardware::NUM_CPU_CORES)
+        return std::addressof(*impl->schedulers[core_id]);
+    return {}; // This is expected when called from not a guest thread
 }
 
 Kernel::KHardwareTimer& KernelCore::HardwareTimer() {
diff --git a/src/core/hle/service/am/applet.h b/src/core/hle/service/am/applet.h
index 0763a5838e..a693a47d7a 100644
--- a/src/core/hle/service/am/applet.h
+++ b/src/core/hle/service/am/applet.h
@@ -95,9 +95,9 @@ struct Applet {
     bool request_exit_to_library_applet_at_execute_next_program_enabled{};
 
     // Channels
-    std::deque<std::vector<u8>> user_channel_launch_parameter{};
-    std::deque<std::vector<u8>> preselected_user_launch_parameter{};
-    std::deque<std::vector<u8>> friend_invitation_storage_channel{};
+    std::vector<std::vector<u8>> user_channel_launch_parameter{};
+    std::vector<std::vector<u8>> preselected_user_launch_parameter{};
+    std::vector<std::vector<u8>> friend_invitation_storage_channel{};
 
     // Context Stack
     std::stack<SharedPointer<IStorage>> context_stack{};
diff --git a/src/core/hle/service/ns/platform_service_manager.cpp b/src/core/hle/service/ns/platform_service_manager.cpp
index 293c014eae..ec9f64945d 100644
--- a/src/core/hle/service/ns/platform_service_manager.cpp
+++ b/src/core/hle/service/ns/platform_service_manager.cpp
@@ -7,6 +7,7 @@
 #include <algorithm>
 #include <cstring>
 #include <vector>
+#include <boost/container/static_vector.hpp>
 
 #include "common/assert.h"
 #include "common/common_types.h"
@@ -40,96 +41,51 @@ constexpr u32 EXPECTED_MAGIC{0x36f81a1e};  // What we expect the encrypted bfttf
 constexpr u64 SHARED_FONT_MEM_SIZE{0x1100000};
 constexpr FontRegion EMPTY_REGION{0, 0};
 
-static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMemory& output,
-                              std::size_t& offset) {
-    ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE,
-               "Shared fonts exceeds 17mb!");
-    ASSERT_MSG(input[0] == EXPECTED_MAGIC, "Failed to derive key, unexpected magic number");
-
+static void DecryptSharedFont(const std::span<u32 const> input, std::span<u8> output, std::size_t& offset) {
+    ASSERT(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE && "Shared fonts exceeds 17mb!");
+    ASSERT(input[0] == EXPECTED_MAGIC && "Failed to derive key, unexpected magic number");
     const u32 KEY = input[0] ^ EXPECTED_RESULT; // Derive key using an inverse xor
     std::vector<u32> transformed_font(input.size());
     // TODO(ogniK): Figure out a better way to do this
-    std::transform(input.begin(), input.end(), transformed_font.begin(),
-                   [&KEY](u32 font_data) { return Common::swap32(font_data ^ KEY); });
+    std::transform(input.begin(), input.end(), transformed_font.begin(), [&KEY](u32 font_data) { return Common::swap32(font_data ^ KEY); });
     transformed_font[1] = Common::swap32(transformed_font[1]) ^ KEY; // "re-encrypt" the size
-    std::memcpy(output.data() + offset, transformed_font.data(),
-                transformed_font.size() * sizeof(u32));
+    std::memcpy(output.data() + offset, transformed_font.data(), transformed_font.size() * sizeof(u32));
     offset += transformed_font.size() * sizeof(u32);
 }
 
 void DecryptSharedFontToTTF(const std::vector<u32>& input, std::vector<u8>& output) {
     ASSERT_MSG(input[0] == EXPECTED_MAGIC, "Failed to derive key, unexpected magic number");
-
     if (input.size() < 2) {
         LOG_ERROR(Service_NS, "Input font is empty");
         return;
     }
-
     const u32 KEY = input[0] ^ EXPECTED_RESULT; // Derive key using an inverse xor
     std::vector<u32> transformed_font(input.size());
     // TODO(ogniK): Figure out a better way to do this
-    std::transform(input.begin(), input.end(), transformed_font.begin(),
-                   [&KEY](u32 font_data) { return Common::swap32(font_data ^ KEY); });
-    std::memcpy(output.data(), transformed_font.data() + 2,
-                (transformed_font.size() - 2) * sizeof(u32));
+    std::transform(input.begin(), input.end(), transformed_font.begin(), [&KEY](u32 font_data) { return Common::swap32(font_data ^ KEY); });
+    std::memcpy(output.data(), transformed_font.data() + 2, (transformed_font.size() - 2) * sizeof(u32));
 }
 
-void EncryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output,
-                       std::size_t& offset) {
-    ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE,
-               "Shared fonts exceeds 17mb!");
-
+void EncryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output, std::size_t& offset) {
+    ASSERT(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE && "Shared fonts exceeds 17mb!");
     const auto key = Common::swap32(EXPECTED_RESULT ^ EXPECTED_MAGIC);
     std::vector<u32> transformed_font(input.size() + 2);
     transformed_font[0] = Common::swap32(EXPECTED_MAGIC);
     transformed_font[1] = Common::swap32(static_cast<u32>(input.size() * sizeof(u32))) ^ key;
-    std::transform(input.begin(), input.end(), transformed_font.begin() + 2,
-                   [key](u32 in) { return in ^ key; });
-    std::memcpy(output.data() + offset, transformed_font.data(),
-                transformed_font.size() * sizeof(u32));
+    std::transform(input.begin(), input.end(), transformed_font.begin() + 2, [key](u32 in) { return in ^ key; });
+    std::memcpy(output.data() + offset, transformed_font.data(), transformed_font.size() * sizeof(u32));
     offset += transformed_font.size() * sizeof(u32);
 }
 
-// Helper function to make BuildSharedFontsRawRegions a bit nicer
-static u32 GetU32Swapped(const u8* data) {
-    u32 value;
-    std::memcpy(&value, data, sizeof(value));
-    return Common::swap32(value);
-}
-
 struct IPlatformServiceManager::Impl {
     const FontRegion& GetSharedFontRegion(std::size_t index) const {
-        if (index >= shared_font_regions.size() || shared_font_regions.empty()) {
-            // No font fallback
-            return EMPTY_REGION;
-        }
-        return shared_font_regions.at(index);
+        return index < shared_font_regions.size() ? shared_font_regions[index] : EMPTY_REGION;
     }
-
-    void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) {
-        // As we can derive the xor key we can just populate the offsets
-        // based on the shared memory dump
-        unsigned cur_offset = 0;
-
-        for (std::size_t i = 0; i < SHARED_FONTS.size(); i++) {
-            // Out of shared fonts/invalid font
-            if (GetU32Swapped(input.data() + cur_offset) != EXPECTED_RESULT) {
-                break;
-            }
-
-            // Derive key within inverse xor
-            const u32 KEY = GetU32Swapped(input.data() + cur_offset) ^ EXPECTED_MAGIC;
-            const u32 SIZE = GetU32Swapped(input.data() + cur_offset + 4) ^ KEY;
-            shared_font_regions.push_back(FontRegion{cur_offset + 8, SIZE});
-            cur_offset += SIZE + 8;
-        }
-    }
-
-    /// Backing memory for the shared font data
-    std::shared_ptr<Kernel::PhysicalMemory> shared_font;
-
     // Automatically populated based on shared_fonts dump or system archives.
-    std::vector<FontRegion> shared_font_regions;
+    // 6 builtin fonts + extra 2 for whatever may come after
+    boost::container::static_vector<FontRegion, 8> shared_font_regions;
+    /// Backing memory for the shared font data
+    std::array<u8, SHARED_FONT_MEM_SIZE> shared_font;
 };
 
 IPlatformServiceManager::IPlatformServiceManager(Core::System& system_, const char* service_name_)
@@ -162,8 +118,6 @@ IPlatformServiceManager::IPlatformServiceManager(Core::System& system_, const ch
     const auto* nand = fsc.GetSystemNANDContents();
     std::size_t offset = 0;
     // Rebuild shared fonts from data ncas or synthesize
-
-    impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(SHARED_FONT_MEM_SIZE);
     for (auto& font : SHARED_FONTS) {
         FileSys::VirtualFile romfs;
         const auto nca =
@@ -197,9 +151,8 @@ IPlatformServiceManager::IPlatformServiceManager(Core::System& system_, const ch
         std::transform(font_data_u32.begin(), font_data_u32.end(), font_data_u32.begin(),
                        Common::swap32);
         // Font offset and size do not account for the header
-        const FontRegion region{static_cast<u32>(offset + 8),
-                                static_cast<u32>((font_data_u32.size() * sizeof(u32)) - 8)};
-        DecryptSharedFont(font_data_u32, *impl->shared_font, offset);
+        const FontRegion region{u32(offset + 8), u32((font_data_u32.size() * sizeof(u32)) - 8)};
+        DecryptSharedFont(font_data_u32, impl->shared_font, offset);
         impl->shared_font_regions.push_back(region);
     }
 }
@@ -231,14 +184,12 @@ Result IPlatformServiceManager::GetSharedMemoryAddressOffset(Out<u32> out_shared
     R_SUCCEED();
 }
 
-Result IPlatformServiceManager::GetSharedMemoryNativeHandle(
-    OutCopyHandle<Kernel::KSharedMemory> out_shared_memory_native_handle) {
+Result IPlatformServiceManager::GetSharedMemoryNativeHandle(OutCopyHandle<Kernel::KSharedMemory> out_shared_memory_native_handle) {
     // Map backing memory for the font data
     LOG_DEBUG(Service_NS, "called");
 
     // Create shared font memory object
-    std::memcpy(kernel.GetFontSharedMem().GetPointer(), impl->shared_font->data(),
-                impl->shared_font->size());
+    std::memcpy(kernel.GetFontSharedMem().GetPointer(), impl->shared_font.data(), impl->shared_font.size());
 
     // FIXME: this shouldn't belong to the kernel
     *out_shared_memory_native_handle = &kernel.GetFontSharedMem();
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 94599532b3..c94b66e6bc 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -87,12 +87,8 @@ add_library(video_core STATIC
     host1x/syncpoint_manager.h
     host1x/vic.cpp
     host1x/vic.h
-    macro/macro.cpp
-    macro/macro.h
-    macro/macro_hle.cpp
-    macro/macro_hle.h
-    macro/macro_interpreter.cpp
-    macro/macro_interpreter.h
+    macro.cpp
+    macro.h
     fence_manager.h
     gpu.cpp
     gpu.h
@@ -375,10 +371,6 @@ else()
 endif()
 
 if (ARCHITECTURE_x86_64)
-    target_sources(video_core PRIVATE
-        macro/macro_jit_x64.cpp
-        macro/macro_jit_x64.h
-    )
     target_link_libraries(video_core PUBLIC xbyak::xbyak)
 endif()
 
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index a67b35453b..03b2e3fdf9 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -107,35 +107,27 @@ bool DmaPusher::Step() {
 }
 
 void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
-    for (std::size_t index = 0; index < commands.size();) {
-        const CommandHeader& command_header = commands[index];
-
-        if (dma_state.method_count) {
-            // Data word of methods command
-            dma_state.dma_word_offset = static_cast<u32>(index * sizeof(u32));
-            if (dma_state.non_incrementing) {
-                const u32 max_write = static_cast<u32>(
-                    std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
-                CallMultiMethod(&command_header.argument, max_write);
-                dma_state.method_count -= max_write;
-                dma_state.is_last_call = true;
-                index += max_write;
-                continue;
-            } else {
-                dma_state.is_last_call = dma_state.method_count <= 1;
-                CallMethod(command_header.argument);
-            }
-
-            if (!dma_state.non_incrementing) {
-                dma_state.method++;
-            }
-
-            if (dma_increment_once) {
-                dma_state.non_incrementing = true;
-            }
-
+    for (size_t index = 0; index < commands.size();) {
+        // Data word of methods command
+        if (dma_state.method_count && dma_state.non_incrementing) {
+            auto const& command_header = commands[index]; //must ref (MUltiMethod re)
+            dma_state.dma_word_offset = u32(index * sizeof(u32));
+            const u32 max_write = u32(std::min<std::size_t>(index + dma_state.method_count, commands.size()) - index);
+            CallMultiMethod(&command_header.argument, max_write);
+            dma_state.method_count -= max_write;
+            dma_state.is_last_call = true;
+            index += max_write;
+        } else if (dma_state.method_count) {
+            auto const command_header = commands[index]; //can copy
+            dma_state.dma_word_offset = u32(index * sizeof(u32));
+            dma_state.is_last_call = dma_state.method_count <= 1;
+            CallMethod(command_header.argument);
+            dma_state.method += !dma_state.non_incrementing ? 1 : 0;
+            dma_state.non_incrementing |= dma_increment_once;
             dma_state.method_count--;
+            index++;
         } else {
+            auto const command_header = commands[index]; //can copy
             // No command active - this is the first word of a new one
             switch (command_header.mode) {
             case SubmissionMode::Increasing:
@@ -151,8 +143,7 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
             case SubmissionMode::Inline:
                 dma_state.method = command_header.method;
                 dma_state.subchannel = command_header.subchannel;
-                dma_state.dma_word_offset = static_cast<u64>(
-                    -static_cast<s64>(dma_state.dma_get)); // negate to set address as 0
+                dma_state.dma_word_offset = u64(-s64(dma_state.dma_get)); // negate to set address as 0
                 CallMethod(command_header.arg_count);
                 dma_state.non_incrementing = true;
                 dma_increment_once = false;
@@ -165,8 +156,8 @@ void DmaPusher::ProcessCommands(std::span<const CommandHeader> commands) {
             default:
                 break;
             }
+            index++;
         }
-        index++;
     }
 }
 
@@ -186,26 +177,24 @@ void DmaPusher::CallMethod(u32 argument) const {
         });
     } else {
         auto subchannel = subchannels[dma_state.subchannel];
-        if (!subchannel->execution_mask[dma_state.method]) [[likely]] {
+        if (!subchannel->execution_mask[dma_state.method]) {
             subchannel->method_sink.emplace_back(dma_state.method, argument);
-            return;
+        } else {
+            subchannel->ConsumeSink();
+            subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
+            subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
         }
-        subchannel->ConsumeSink();
-        subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
-        subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call);
     }
 }
 
 void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
     if (dma_state.method < non_puller_methods) {
-        puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
-                               dma_state.method_count);
+        puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, dma_state.method_count);
     } else {
         auto subchannel = subchannels[dma_state.subchannel];
         subchannel->ConsumeSink();
         subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset;
-        subchannel->CallMultiMethod(dma_state.method, base_start, num_methods,
-                                    dma_state.method_count);
+        subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, dma_state.method_count);
     }
 }
 
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 77729fd5b6..d8d2ad74c6 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -27,9 +27,7 @@ constexpr u32 MacroRegistersStart = 0xE00;
 
 Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
     : draw_manager{std::make_unique<DrawManager>(this)}, system{system_},
-      memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{
-                                                                                memory_manager,
-                                                                                regs.upload} {
+      memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
     dirty.flags.flip();
     InitializeRegisterDefaults();
     execution_mask.reset();
@@ -329,8 +327,7 @@ void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) {
     }
 }
 
-void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument,
-                                  bool is_last_call) {
+void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call) {
     switch (method) {
     case MAXWELL3D_REG_INDEX(wait_for_idle):
         return rasterizer->WaitForIdle();
@@ -427,9 +424,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
         return;
     }
 
-    ASSERT_MSG(method < Regs::NUM_REGS,
-               "Invalid Maxwell3D register, increase the size of the Regs structure");
-
+    ASSERT(method < Regs::NUM_REGS && "Invalid Maxwell3D register, increase the size of the Regs structure");
     const u32 argument = ProcessShadowRam(method, method_argument);
     ProcessDirtyRegisters(method, argument);
     ProcessMethodCall(method, argument, method_argument, is_last_call);
@@ -670,7 +665,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
 }
 
 u32 Maxwell3D::GetRegisterValue(u32 method) const {
-    ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
+    ASSERT(method < Regs::NUM_REGS && "Invalid Maxwell3D register");
     return regs.reg_array[method];
 }
 
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ae2e7a84c4..8c50a4ea2f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -23,7 +23,7 @@
 #include "video_core/engines/engine_interface.h"
 #include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
-#include "video_core/macro/macro.h"
+#include "video_core/macro.h"
 #include "video_core/textures/texture.h"
 
 namespace Core {
@@ -3203,7 +3203,7 @@ private:
     std::vector<u32> macro_params;
 
     /// Interpreter for the macro codes uploaded to the GPU.
-    std::unique_ptr<MacroEngine> macro_engine;
+    std::optional<MacroEngine> macro_engine;
 
     Upload::State upload_state;
 
diff --git a/src/video_core/macro.cpp b/src/video_core/macro.cpp
new file mode 100644
index 0000000000..3fe69be4dd
--- /dev/null
+++ b/src/video_core/macro.cpp
@@ -0,0 +1,1667 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <cstring>
+#include <fstream>
+#include <optional>
+#include <span>
+
+#include <fstream>
+#ifdef ARCHITECTURE_x86_64
+// xbyak hates human beings
+#ifdef __GNUC__
+#pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wshadow"
+#endif
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wconversion"
+#pragma clang diagnostic ignored "-Wshadow"
+#endif
+#include <xbyak/xbyak.h>
+#endif
+
+#include "common/assert.h"
+#include "common/scope_exit.h"
+#include "common/fs/fs.h"
+#include "common/fs/path_util.h"
+#include "common/settings.h"
+#include "common/container_hash.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/draw_manager.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/macro.h"
+
+#include "common/assert.h"
+#include "common/bit_field.h"
+#include "common/logging/log.h"
+#ifdef ARCHITECTURE_x86_64
+#include "common/x64/xbyak_abi.h"
+#include "common/x64/xbyak_util.h"
+#endif
+#include "video_core/engines/maxwell_3d.h"
+
+namespace Tegra {
+
+using Maxwell3D = Engines::Maxwell3D;
+
+namespace {
+
+bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) {
+    switch (topology) {
+    case Maxwell3D::Regs::PrimitiveTopology::Points:
+    case Maxwell3D::Regs::PrimitiveTopology::Lines:
+    case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
+    case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
+    case Maxwell3D::Regs::PrimitiveTopology::Triangles:
+    case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
+    case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
+    case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
+    case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
+    case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
+    case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
+    case Maxwell3D::Regs::PrimitiveTopology::Patches:
+        return true;
+    case Maxwell3D::Regs::PrimitiveTopology::Quads:
+    case Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
+    case Maxwell3D::Regs::PrimitiveTopology::Polygon:
+    default:
+        return false;
+    }
+}
+
+class HLEMacroImpl : public CachedMacro {
+public:
+    explicit HLEMacroImpl(Maxwell3D& maxwell3d_)
+        : CachedMacro(maxwell3d_)
+    {}
+};
+
+/// @note: these macros have two versions, a normal and extended version, with the extended version
+/// also assigning the base vertex/instance.
+template <bool extended>
+class HLE_DrawArraysIndirect final : public HLEMacroImpl {
+public:
+    explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_)
+        : HLEMacroImpl(maxwell3d_)
+    {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
+        if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
+            Fallback(parameters);
+            return;
+        }
+
+        auto& params = maxwell3d.draw_manager->GetIndirectParams();
+        params.is_byte_count = false;
+        params.is_indexed = false;
+        params.include_count = false;
+        params.count_start_address = 0;
+        params.indirect_start_address = maxwell3d.GetMacroAddress(1);
+        params.buffer_size = 4 * sizeof(u32);
+        params.max_draw_counts = 1;
+        params.stride = 0;
+
+        if (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+            maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+        }
+
+        maxwell3d.draw_manager->DrawArrayIndirect(topology);
+
+        if (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+            maxwell3d.replace_table.clear();
+        }
+    }
+
+private:
+    void Fallback(const std::vector<u32>& parameters) {
+        SCOPE_EXIT {
+            if (extended) {
+                maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+                maxwell3d.replace_table.clear();
+            }
+        };
+        maxwell3d.RefreshParameters();
+        const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+
+        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
+        const u32 vertex_first = parameters[3];
+        const u32 vertex_count = parameters[1];
+
+        if (!IsTopologySafe(topology) && size_t(maxwell3d.GetMaxCurrentVertices()) < size_t(vertex_first) + size_t(vertex_count)) {
+            ASSERT(false && "Faulty draw!");
+            return;
+        }
+
+        const u32 base_instance = parameters[4];
+        if (extended) {
+            maxwell3d.regs.global_base_instance_index = base_instance;
+            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+        }
+
+        maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance,
+                                          instance_count);
+
+        if (extended) {
+            maxwell3d.regs.global_base_instance_index = 0;
+            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+            maxwell3d.replace_table.clear();
+        }
+    }
+};
+
+/*
+ * @note: these macros have two versions, a normal and extended version, with the extended version
+ * also assigning the base vertex/instance.
+ */
+template <bool extended>
+class HLE_DrawIndexedIndirect final : public HLEMacroImpl {
+public:
+    explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
+        if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
+            Fallback(parameters);
+            return;
+        }
+
+        const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
+        const u32 element_base = parameters[4];
+        const u32 base_instance = parameters[5];
+        maxwell3d.regs.vertex_id_base = element_base;
+        maxwell3d.regs.global_base_vertex_index = element_base;
+        maxwell3d.regs.global_base_instance_index = base_instance;
+        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+        if (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+            maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+            maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+        }
+        auto& params = maxwell3d.draw_manager->GetIndirectParams();
+        params.is_byte_count = false;
+        params.is_indexed = true;
+        params.include_count = false;
+        params.count_start_address = 0;
+        params.indirect_start_address = maxwell3d.GetMacroAddress(1);
+        params.buffer_size = 5 * sizeof(u32);
+        params.max_draw_counts = 1;
+        params.stride = 0;
+        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+        maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
+        maxwell3d.regs.vertex_id_base = 0x0;
+        maxwell3d.regs.global_base_vertex_index = 0x0;
+        maxwell3d.regs.global_base_instance_index = 0x0;
+        if (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+            maxwell3d.replace_table.clear();
+        }
+    }
+
+private:
+    void Fallback(const std::vector<u32>& parameters) {
+        maxwell3d.RefreshParameters();
+        const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
+        const u32 element_base = parameters[4];
+        const u32 base_instance = parameters[5];
+        maxwell3d.regs.vertex_id_base = element_base;
+        maxwell3d.regs.global_base_vertex_index = element_base;
+        maxwell3d.regs.global_base_instance_index = base_instance;
+        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+        if (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+            maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+            maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+        }
+
+        maxwell3d.draw_manager->DrawIndex(Tegra::Maxwell3D::Regs::PrimitiveTopology(parameters[0]), parameters[3], parameters[1], element_base, base_instance, instance_count);
+
+        maxwell3d.regs.vertex_id_base = 0x0;
+        maxwell3d.regs.global_base_vertex_index = 0x0;
+        maxwell3d.regs.global_base_instance_index = 0x0;
+        if (extended) {
+            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+            maxwell3d.replace_table.clear();
+        }
+    }
+};
+
+class HLE_MultiLayerClear final : public HLEMacroImpl {
+public:
+    explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+        ASSERT(parameters.size() == 1);
+
+        const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
+        const u32 rt_index = clear_params.RT;
+        const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
+        ASSERT(clear_params.layer == 0);
+
+        maxwell3d.regs.clear_surface.raw = clear_params.raw;
+        maxwell3d.draw_manager->Clear(num_layers);
+    }
+};
+
+class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl {
+public:
+    explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]);
+        if (!IsTopologySafe(topology)) {
+            Fallback(parameters);
+            return;
+        }
+
+        const u32 start_indirect = parameters[0];
+        const u32 end_indirect = parameters[1];
+        if (start_indirect >= end_indirect) {
+            // Nothing to do.
+            return;
+        }
+
+        const u32 padding = parameters[3]; // padding is in words
+
+        // size of each indirect segment
+        const u32 indirect_words = 5 + padding;
+        const u32 stride = indirect_words * sizeof(u32);
+        const std::size_t draw_count = end_indirect - start_indirect;
+        const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
+        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+        auto& params = maxwell3d.draw_manager->GetIndirectParams();
+        params.is_byte_count = false;
+        params.is_indexed = true;
+        params.include_count = true;
+        params.count_start_address = maxwell3d.GetMacroAddress(4);
+        params.indirect_start_address = maxwell3d.GetMacroAddress(5);
+        params.buffer_size = stride * draw_count;
+        params.max_draw_counts = draw_count;
+        params.stride = stride;
+        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+        maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+        maxwell3d.SetHLEReplacementAttributeType(
+            0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+        maxwell3d.SetHLEReplacementAttributeType(
+            0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+        maxwell3d.SetHLEReplacementAttributeType(0, 0x648,
+                                                 Maxwell3D::HLEReplacementAttributeType::DrawID);
+        maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
+        maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+        maxwell3d.replace_table.clear();
+    }
+
+private:
+    void Fallback(const std::vector<u32>& parameters) {
+        SCOPE_EXIT {
+            // Clean everything.
+            maxwell3d.regs.vertex_id_base = 0x0;
+            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
+            maxwell3d.replace_table.clear();
+        };
+        maxwell3d.RefreshParameters();
+        const u32 start_indirect = parameters[0];
+        const u32 end_indirect = parameters[1];
+        if (start_indirect >= end_indirect) {
+            // Nothing to do.
+            return;
+        }
+        const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
+        const u32 padding = parameters[3];
+        const std::size_t max_draws = parameters[4];
+
+        const u32 indirect_words = 5 + padding;
+        const std::size_t first_draw = start_indirect;
+        const std::size_t effective_draws = end_indirect - start_indirect;
+        const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws);
+
+        for (std::size_t index = first_draw; index < last_draw; index++) {
+            const std::size_t base = index * indirect_words + 5;
+            const u32 base_vertex = parameters[base + 3];
+            const u32 base_instance = parameters[base + 4];
+            maxwell3d.regs.vertex_id_base = base_vertex;
+            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
+            maxwell3d.SetHLEReplacementAttributeType(
+                0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
+            maxwell3d.CallMethod(0x8e3, 0x648, true);
+            maxwell3d.CallMethod(0x8e4, static_cast<u32>(index), true);
+            maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
+            maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
+                                              base_vertex, base_instance, parameters[base + 1]);
+        }
+    }
+};
+
+class HLE_DrawIndirectByteCount final : public HLEMacroImpl {
+public:
+    explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback();
+
+        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0xFFFFU);
+        if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) {
+            Fallback(parameters);
+            return;
+        }
+        auto& params = maxwell3d.draw_manager->GetIndirectParams();
+        params.is_byte_count = true;
+        params.is_indexed = false;
+        params.include_count = false;
+        params.count_start_address = 0;
+        params.indirect_start_address = maxwell3d.GetMacroAddress(2);
+        params.buffer_size = 4;
+        params.max_draw_counts = 1;
+        params.stride = parameters[1];
+        maxwell3d.regs.draw.begin = parameters[0];
+        maxwell3d.regs.draw_auto_stride = parameters[1];
+        maxwell3d.regs.draw_auto_byte_count = parameters[2];
+
+        maxwell3d.draw_manager->DrawArrayIndirect(topology);
+    }
+
+private:
+    void Fallback(const std::vector<u32>& parameters) {
+        maxwell3d.RefreshParameters();
+
+        maxwell3d.regs.draw.begin = parameters[0];
+        maxwell3d.regs.draw_auto_stride = parameters[1];
+        maxwell3d.regs.draw_auto_byte_count = parameters[2];
+
+        maxwell3d.draw_manager->DrawArray(
+            maxwell3d.regs.draw.topology, 0,
+            maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1);
+    }
+};
+
+class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl {
+public:
+    explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+        const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2;
+        const u32 address = maxwell3d.regs.shadow_scratch[24];
+        auto& const_buffer = maxwell3d.regs.const_buffer;
+        const_buffer.size = 0x7000;
+        const_buffer.address_high = (address >> 24) & 0xFF;
+        const_buffer.address_low = address << 8;
+        const_buffer.offset = offset;
+    }
+};
+
+class HLE_D7333D26E0A93EDE final : public HLEMacroImpl {
+public:
+    explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+        const size_t index = parameters[0];
+        const u32 address = maxwell3d.regs.shadow_scratch[42 + index];
+        const u32 size = maxwell3d.regs.shadow_scratch[47 + index];
+        auto& const_buffer = maxwell3d.regs.const_buffer;
+        const_buffer.size = size;
+        const_buffer.address_high = (address >> 24) & 0xFF;
+        const_buffer.address_low = address << 8;
+    }
+};
+
+class HLE_BindShader final : public HLEMacroImpl {
+public:
+    explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+        auto& regs = maxwell3d.regs;
+        const u32 index = parameters[0];
+        if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) {
+            return;
+        }
+
+        regs.pipelines[index & 0xF].offset = parameters[2];
+        maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true;
+        regs.shadow_scratch[28 + index] = parameters[1];
+        regs.shadow_scratch[34 + index] = parameters[2];
+
+        const u32 address = parameters[4];
+        auto& const_buffer = regs.const_buffer;
+        const_buffer.size = 0x10000;
+        const_buffer.address_high = (address >> 24) & 0xFF;
+        const_buffer.address_low = address << 8;
+
+        const size_t bind_group_id = parameters[3] & 0x7F;
+        auto& bind_group = regs.bind_groups[bind_group_id];
+        bind_group.raw_config = 0x11;
+        maxwell3d.ProcessCBBind(bind_group_id);
+    }
+};
+
+class HLE_SetRasterBoundingBox final : public HLEMacroImpl {
+public:
+    explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+        const u32 raster_mode = parameters[0];
+        auto& regs = maxwell3d.regs;
+        const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable;
+        const u32 scratch_data = maxwell3d.regs.shadow_scratch[52];
+        regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F;
+        regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled);
+    }
+};
+
+template <size_t base_size>
+class HLE_ClearConstBuffer final : public HLEMacroImpl {
+public:
+    explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+        static constexpr std::array<u32, base_size> zeroes{};
+        auto& regs = maxwell3d.regs;
+        regs.const_buffer.size = u32(base_size);
+        regs.const_buffer.address_high = parameters[0];
+        regs.const_buffer.address_low = parameters[1];
+        regs.const_buffer.offset = 0;
+        maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4);
+    }
+};
+
+class HLE_ClearMemory final : public HLEMacroImpl {
+public:
+    explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+
+        const u32 needed_memory = parameters[2] / sizeof(u32);
+        if (needed_memory > zero_memory.size()) {
+            zero_memory.resize(needed_memory, 0);
+        }
+        auto& regs = maxwell3d.regs;
+        regs.upload.line_length_in = parameters[2];
+        regs.upload.line_count = 1;
+        regs.upload.dest.address_high = parameters[0];
+        regs.upload.dest.address_low = parameters[1];
+        maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
+        maxwell3d.CallMultiMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), zero_memory.data(), needed_memory, needed_memory);
+    }
+
+private:
+    std::vector<u32> zero_memory;
+};
+
+class HLE_TransformFeedbackSetup final : public HLEMacroImpl {
+public:
+    explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
+
+    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
+        maxwell3d.RefreshParameters();
+
+        auto& regs = maxwell3d.regs;
+        regs.transform_feedback_enabled = 1;
+        regs.transform_feedback.buffers[0].start_offset = 0;
+        regs.transform_feedback.buffers[1].start_offset = 0;
+        regs.transform_feedback.buffers[2].start_offset = 0;
+        regs.transform_feedback.buffers[3].start_offset = 0;
+
+        regs.upload.line_length_in = 4;
+        regs.upload.line_count = 1;
+        regs.upload.dest.address_high = parameters[0];
+        regs.upload.dest.address_low = parameters[1];
+        maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
+        maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true);
+
+        maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address());
+    }
+};
+
+} // Anonymous namespace
+
+HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
+
+HLEMacro::~HLEMacro() = default;
+
+std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
+    // Compiler will make you a GREAT job at making an ad-hoc hash table :)
+    switch (hash) {
+    case 0x0D61FC9FAAC9FCADULL: return std::make_unique<HLE_DrawArraysIndirect<false>>(maxwell3d);
+    case 0x8A4D173EB99A8603ULL: return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d);
+    case 0x771BB18C62444DA0ULL: return std::make_unique<HLE_DrawIndexedIndirect<false>>(maxwell3d);
+    case 0x0217920100488FF7ULL: return std::make_unique<HLE_DrawIndexedIndirect<true>>(maxwell3d);
+    case 0x3F5E74B9C9A50164ULL: return std::make_unique<HLE_MultiDrawIndexedIndirectCount>(maxwell3d);
+    case 0xEAD26C3E2109B06BULL: return std::make_unique<HLE_MultiLayerClear>(maxwell3d);
+    case 0xC713C83D8F63CCF3ULL: return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d);
+    case 0xD7333D26E0A93EDEULL: return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d);
+    case 0xEB29B2A09AA06D38ULL: return std::make_unique<HLE_BindShader>(maxwell3d);
+    case 0xDB1341DBEB4C8AF7ULL: return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d);
+    case 0x6C97861D891EDf7EULL: return std::make_unique<HLE_ClearConstBuffer<0x5F00>>(maxwell3d);
+    case 0xD246FDDF3A6173D7ULL: return std::make_unique<HLE_ClearConstBuffer<0x7000>>(maxwell3d);
+    case 0xEE4D0004BEC8ECF4ULL: return std::make_unique<HLE_ClearMemory>(maxwell3d);
+    case 0xFC0CF27F5FFAA661ULL: return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d);
+    case 0xB5F74EDB717278ECULL: return std::make_unique<HLE_DrawIndirectByteCount>(maxwell3d);
+    default:
+        return nullptr;
+    }
+}
+
+namespace {
+class MacroInterpreterImpl final : public CachedMacro {
+public:
+    explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
+        : CachedMacro(maxwell3d_)
+        , code{code_}
+    {}
+
+    void Execute(const std::vector<u32>& params, u32 method) override;
+
+private:
+    /// Resets the execution engine state, zeroing registers, etc.
+    void Reset();
+
+    /**
+     * Executes a single macro instruction located at the current program counter. Returns whether
+     * the interpreter should keep running.
+     *
+     * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
+     *                      previous instruction.
+     */
+    bool Step(bool is_delay_slot);
+
+    /// Calculates the result of an ALU operation. src_a OP src_b;
+    u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
+
+    /// Performs the result operation on the input result and stores it in the specified register
+    /// (if necessary).
+    void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
+
+    /// Evaluates the branch condition and returns whether the branch should be taken or not.
+    bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
+
+    /// Reads an opcode at the current program counter location.
+    Macro::Opcode GetOpcode() const;
+
+    /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
+    u32 GetRegister(u32 register_id) const;
+
+    /// Sets the register to the input value.
+    void SetRegister(u32 register_id, u32 value);
+
+    /// Sets the method address to use for the next Send instruction.
+    void SetMethodAddress(u32 address);
+
+    /// Calls a GPU Engine method with the input parameter.
+    void Send(u32 value);
+
+    /// Reads a GPU register located at the method address.
+    u32 Read(u32 method) const;
+
+    /// Returns the next parameter in the parameter queue.
+    u32 FetchParameter();
+
+    /// Current program counter
+    u32 pc{};
+    /// Program counter to execute at after the delay slot is executed.
+    std::optional<u32> delayed_pc;
+
+    /// General purpose macro registers.
+    std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
+
+    /// Method address to use for the next Send instruction.
+    Macro::MethodAddress method_address = {};
+
+    /// Input parameters of the current macro.
+    std::unique_ptr<u32[]> parameters;
+    std::size_t num_parameters = 0;
+    std::size_t parameters_capacity = 0;
+    /// Index of the next parameter that will be fetched by the 'parm' instruction.
+    u32 next_parameter_index = 0;
+
+    bool carry_flag = false;
+    const std::vector<u32>& code;
+};
+
+void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) {
+    Reset();
+
+    registers[1] = params[0];
+    num_parameters = params.size();
+
+    if (num_parameters > parameters_capacity) {
+        parameters_capacity = num_parameters;
+        parameters = std::make_unique<u32[]>(num_parameters);
+    }
+    std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32));
+
+    // Execute the code until we hit an exit condition.
+    bool keep_executing = true;
+    while (keep_executing) {
+        keep_executing = Step(false);
+    }
+
+    // Assert the the macro used all the input parameters
+    ASSERT(next_parameter_index == num_parameters);
+}
+
+void MacroInterpreterImpl::Reset() {
+    registers = {};
+    pc = 0;
+    delayed_pc = {};
+    method_address.raw = 0;
+    num_parameters = 0;
+    // The next parameter index starts at 1, because $r1 already has the value of the first
+    // parameter.
+    next_parameter_index = 1;
+    carry_flag = false;
+}
+
+bool MacroInterpreterImpl::Step(bool is_delay_slot) {
+    u32 base_address = pc;
+
+    Macro::Opcode opcode = GetOpcode();
+    pc += 4;
+
+    // Update the program counter if we were delayed
+    if (delayed_pc) {
+        ASSERT(is_delay_slot);
+        pc = *delayed_pc;
+        delayed_pc = {};
+    }
+
+    switch (opcode.operation) {
+    case Macro::Operation::ALU: {
+        u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
+                                  GetRegister(opcode.src_b));
+        ProcessResult(opcode.result_operation, opcode.dst, result);
+        break;
+    }
+    case Macro::Operation::AddImmediate: {
+        ProcessResult(opcode.result_operation, opcode.dst,
+                      GetRegister(opcode.src_a) + opcode.immediate);
+        break;
+    }
+    case Macro::Operation::ExtractInsert: {
+        u32 dst = GetRegister(opcode.src_a);
+        u32 src = GetRegister(opcode.src_b);
+
+        src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask();
+        dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
+        dst |= src << opcode.bf_dst_bit;
+        ProcessResult(opcode.result_operation, opcode.dst, dst);
+        break;
+    }
+    case Macro::Operation::ExtractShiftLeftImmediate: {
+        u32 dst = GetRegister(opcode.src_a);
+        u32 src = GetRegister(opcode.src_b);
+
+        u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit;
+
+        ProcessResult(opcode.result_operation, opcode.dst, result);
+        break;
+    }
+    case Macro::Operation::ExtractShiftLeftRegister: {
+        u32 dst = GetRegister(opcode.src_a);
+        u32 src = GetRegister(opcode.src_b);
+
+        u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst;
+
+        ProcessResult(opcode.result_operation, opcode.dst, result);
+        break;
+    }
+    case Macro::Operation::Read: {
+        u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
+        ProcessResult(opcode.result_operation, opcode.dst, result);
+        break;
+    }
+    case Macro::Operation::Branch: {
+        ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
+        u32 value = GetRegister(opcode.src_a);
+        bool taken = EvaluateBranchCondition(opcode.branch_condition, value);
+        if (taken) {
+            // Ignore the delay slot if the branch has the annul bit.
+            if (opcode.branch_annul) {
+                pc = base_address + opcode.GetBranchTarget();
+                return true;
+            }
+
+            delayed_pc = base_address + opcode.GetBranchTarget();
+            // Execute one more instruction due to the delay slot.
+            return Step(true);
+        }
+        break;
+    }
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value());
+        break;
+    }
+
+    // An instruction with the Exit flag will not actually
+    // cause an exit if it's executed inside a delay slot.
+    if (opcode.is_exit && !is_delay_slot) {
+        // Exit has a delay slot, execute the next instruction
+        Step(true);
+        return false;
+    }
+
+    return true;
+}
+
+u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) {
+    switch (operation) {
+    case Macro::ALUOperation::Add: {
+        const u64 result{static_cast<u64>(src_a) + src_b};
+        carry_flag = result > 0xffffffff;
+        return static_cast<u32>(result);
+    }
+    case Macro::ALUOperation::AddWithCarry: {
+        const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)};
+        carry_flag = result > 0xffffffff;
+        return static_cast<u32>(result);
+    }
+    case Macro::ALUOperation::Subtract: {
+        const u64 result{static_cast<u64>(src_a) - src_b};
+        carry_flag = result < 0x100000000;
+        return static_cast<u32>(result);
+    }
+    case Macro::ALUOperation::SubtractWithBorrow: {
+        const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)};
+        carry_flag = result < 0x100000000;
+        return static_cast<u32>(result);
+    }
+    case Macro::ALUOperation::Xor:
+        return src_a ^ src_b;
+    case Macro::ALUOperation::Or:
+        return src_a | src_b;
+    case Macro::ALUOperation::And:
+        return src_a & src_b;
+    case Macro::ALUOperation::AndNot:
+        return src_a & ~src_b;
+    case Macro::ALUOperation::Nand:
+        return ~(src_a & src_b);
+
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation);
+        return 0;
+    }
+}
+
+void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) {
+    switch (operation) {
+    case Macro::ResultOperation::IgnoreAndFetch:
+        // Fetch parameter and ignore result.
+        SetRegister(reg, FetchParameter());
+        break;
+    case Macro::ResultOperation::Move:
+        // Move result.
+        SetRegister(reg, result);
+        break;
+    case Macro::ResultOperation::MoveAndSetMethod:
+        // Move result and use as Method Address.
+        SetRegister(reg, result);
+        SetMethodAddress(result);
+        break;
+    case Macro::ResultOperation::FetchAndSend:
+        // Fetch parameter and send result.
+        SetRegister(reg, FetchParameter());
+        Send(result);
+        break;
+    case Macro::ResultOperation::MoveAndSend:
+        // Move and send result.
+        SetRegister(reg, result);
+        Send(result);
+        break;
+    case Macro::ResultOperation::FetchAndSetMethod:
+        // Fetch parameter and use result as Method Address.
+        SetRegister(reg, FetchParameter());
+        SetMethodAddress(result);
+        break;
+    case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
+        // Move result and use as Method Address, then fetch and send parameter.
+        SetRegister(reg, result);
+        SetMethodAddress(result);
+        Send(FetchParameter());
+        break;
+    case Macro::ResultOperation::MoveAndSetMethodSend:
+        // Move result and use as Method Address, then send bits 12:17 of result.
+        SetRegister(reg, result);
+        SetMethodAddress(result);
+        Send((result >> 12) & 0b111111);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation);
+        break;
+    }
+}
+
+bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const {
+    switch (cond) {
+    case Macro::BranchCondition::Zero:
+        return value == 0;
+    case Macro::BranchCondition::NotZero:
+        return value != 0;
+    }
+    UNREACHABLE();
+}
+
+Macro::Opcode MacroInterpreterImpl::GetOpcode() const {
+    ASSERT((pc % sizeof(u32)) == 0);
+    ASSERT(pc < code.size() * sizeof(u32));
+    return {code[pc / sizeof(u32)]};
+}
+
+u32 MacroInterpreterImpl::GetRegister(u32 register_id) const {
+    return registers.at(register_id);
+}
+
+void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) {
+    // Register 0 is hardwired as the zero register.
+    // Ensure no writes to it actually occur.
+    if (register_id == 0) {
+        return;
+    }
+
+    registers.at(register_id) = value;
+}
+
+void MacroInterpreterImpl::SetMethodAddress(u32 address) {
+    method_address.raw = address;
+}
+
+void MacroInterpreterImpl::Send(u32 value) {
+    maxwell3d.CallMethod(method_address.address, value, true);
+    // Increment the method address by the method increment.
+    method_address.address.Assign(method_address.address.Value() +
+                                  method_address.increment.Value());
+}
+
+u32 MacroInterpreterImpl::Read(u32 method) const {
+    return maxwell3d.GetRegisterValue(method);
+}
+
+u32 MacroInterpreterImpl::FetchParameter() {
+    ASSERT(next_parameter_index < num_parameters);
+    return parameters[next_parameter_index++];
+}
+} // Anonymous namespace
+
+#ifdef ARCHITECTURE_x86_64
+namespace {
+constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
+constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d;
+constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11;
+constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
+constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
+constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
+
+constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
+    STATE,
+    RESULT,
+    MAX_PARAMETER,
+    PARAMETERS,
+    METHOD_ADDRESS,
+    BRANCH_HOLDER,
+});
+
+// Arbitrarily chosen based on current booting games.
+constexpr size_t MAX_CODE_SIZE = 0x10000;
+
+std::bitset<32> PersistentCallerSavedRegs() {
+    return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
+}
+
+/// @brief Must enforce W^X constraints, as we yet don't havea  global "NO_EXECUTE" support flag
+/// the speed loss is minimal, and in fact may be negligible, however for your peace of mind
+/// I simply included known OSes whom had W^X issues
+#if defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
+static const auto default_cg_mode = Xbyak::DontSetProtectRWE;
+#else
+static const auto default_cg_mode = nullptr; //Allow RWE
+#endif
+
+class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro {
+public:
+    explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
+        : Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode)
+        , CachedMacro(maxwell3d_)
+        , code{code_}
+    {
+        Compile();
+    }
+
+    void Execute(const std::vector<u32>& parameters, u32 method) override;
+
+    void Compile_ALU(Macro::Opcode opcode);
+    void Compile_AddImmediate(Macro::Opcode opcode);
+    void Compile_ExtractInsert(Macro::Opcode opcode);
+    void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
+    void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
+    void Compile_Read(Macro::Opcode opcode);
+    void Compile_Branch(Macro::Opcode opcode);
+
+private:
+    void Optimizer_ScanFlags();
+
+    void Compile();
+    bool Compile_NextInstruction();
+
+    Xbyak::Reg32 Compile_FetchParameter();
+    Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
+
+    void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
+    void Compile_Send(Xbyak::Reg32 value);
+
+    Macro::Opcode GetOpCode() const;
+
+    struct JITState {
+        Engines::Maxwell3D* maxwell3d{};
+        std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
+        u32 carry_flag{};
+    };
+    static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
+    using ProgramType = void (*)(JITState*, const u32*, const u32*);
+
+    struct OptimizerState {
+        bool can_skip_carry{};
+        bool has_delayed_pc{};
+        bool zero_reg_skip{};
+        bool skip_dummy_addimmediate{};
+        bool optimize_for_method_move{};
+        bool enable_asserts{};
+    };
+    OptimizerState optimizer{};
+
+    std::optional<Macro::Opcode> next_opcode{};
+    ProgramType program{nullptr};
+
+    std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
+    std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
+    Xbyak::Label end_of_code{};
+
+    bool is_delay_slot{};
+    u32 pc{};
+
+    const std::vector<u32>& code;
+};
+
+void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
+    ASSERT_OR_EXECUTE(program != nullptr, { return; });
+    JITState state{};
+    state.maxwell3d = &maxwell3d;
+    state.registers = {};
+    program(&state, parameters.data(), parameters.data() + parameters.size());
+}
+
+void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
+    const bool is_a_zero = opcode.src_a == 0;
+    const bool is_b_zero = opcode.src_b == 0;
+    const bool valid_operation = !is_a_zero && !is_b_zero;
+    [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero;
+    const bool has_zero_register = is_a_zero || is_b_zero;
+    const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
+                                  opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;
+
+    Xbyak::Reg32 src_a;
+    Xbyak::Reg32 src_b;
+
+    if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
+        src_a = Compile_GetRegister(opcode.src_a, RESULT);
+        src_b = Compile_GetRegister(opcode.src_b, eax);
+    } else {
+        if (!is_a_zero) {
+            src_a = Compile_GetRegister(opcode.src_a, RESULT);
+        }
+        if (!is_b_zero) {
+            src_b = Compile_GetRegister(opcode.src_b, eax);
+        }
+    }
+
+    bool has_emitted = false;
+
+    switch (opcode.alu_operation) {
+    case Macro::ALUOperation::Add:
+        if (optimizer.zero_reg_skip) {
+            if (valid_operation) {
+                add(src_a, src_b);
+            }
+        } else {
+            add(src_a, src_b);
+        }
+
+        if (!optimizer.can_skip_carry) {
+            setc(byte[STATE + offsetof(JITState, carry_flag)]);
+        }
+        break;
+    case Macro::ALUOperation::AddWithCarry:
+        bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
+        adc(src_a, src_b);
+        setc(byte[STATE + offsetof(JITState, carry_flag)]);
+        break;
+    case Macro::ALUOperation::Subtract:
+        if (optimizer.zero_reg_skip) {
+            if (valid_operation) {
+                sub(src_a, src_b);
+                has_emitted = true;
+            }
+        } else {
+            sub(src_a, src_b);
+            has_emitted = true;
+        }
+        if (!optimizer.can_skip_carry && has_emitted) {
+            setc(byte[STATE + offsetof(JITState, carry_flag)]);
+        }
+        break;
+    case Macro::ALUOperation::SubtractWithBorrow:
+        bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
+        sbb(src_a, src_b);
+        setc(byte[STATE + offsetof(JITState, carry_flag)]);
+        break;
+    case Macro::ALUOperation::Xor:
+        if (optimizer.zero_reg_skip) {
+            if (valid_operation) {
+                xor_(src_a, src_b);
+            }
+        } else {
+            xor_(src_a, src_b);
+        }
+        break;
+    case Macro::ALUOperation::Or:
+        if (optimizer.zero_reg_skip) {
+            if (valid_operation) {
+                or_(src_a, src_b);
+            }
+        } else {
+            or_(src_a, src_b);
+        }
+        break;
+    case Macro::ALUOperation::And:
+        if (optimizer.zero_reg_skip) {
+            if (!has_zero_register) {
+                and_(src_a, src_b);
+            }
+        } else {
+            and_(src_a, src_b);
+        }
+        break;
+    case Macro::ALUOperation::AndNot:
+        if (optimizer.zero_reg_skip) {
+            if (!is_a_zero) {
+                not_(src_b);
+                and_(src_a, src_b);
+            }
+        } else {
+            not_(src_b);
+            and_(src_a, src_b);
+        }
+        break;
+    case Macro::ALUOperation::Nand:
+        if (optimizer.zero_reg_skip) {
+            if (!is_a_zero) {
+                and_(src_a, src_b);
+                not_(src_a);
+            }
+        } else {
+            and_(src_a, src_b);
+            not_(src_a);
+        }
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value());
+        break;
+    }
+    Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
+    if (optimizer.skip_dummy_addimmediate) {
+        // Games tend to use this as an exit instruction placeholder. It's to encode an instruction
+        // without doing anything. In our case we can just not emit anything.
+        if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) {
+            return;
+        }
+    }
+    // Check for redundant moves
+    if (optimizer.optimize_for_method_move &&
+        opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
+        if (next_opcode.has_value()) {
+            const auto next = *next_opcode;
+            if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
+                opcode.dst == next.dst) {
+                return;
+            }
+        }
+    }
+    if (optimizer.zero_reg_skip && opcode.src_a == 0) {
+        if (opcode.immediate == 0) {
+            xor_(RESULT, RESULT);
+        } else {
+            mov(RESULT, opcode.immediate);
+        }
+    } else {
+        auto result = Compile_GetRegister(opcode.src_a, RESULT);
+        if (opcode.immediate > 2) {
+            add(result, opcode.immediate);
+        } else if (opcode.immediate == 1) {
+            inc(result);
+        } else if (opcode.immediate < 0) {
+            sub(result, opcode.immediate * -1);
+        }
+    }
+    Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
+    auto dst = Compile_GetRegister(opcode.src_a, RESULT);
+    auto src = Compile_GetRegister(opcode.src_b, eax);
+
+    const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
+    and_(dst, mask);
+    shr(src, opcode.bf_src_bit);
+    and_(src, opcode.GetBitfieldMask());
+    shl(src, opcode.bf_dst_bit);
+    or_(dst, src);
+
+    Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
+    const auto dst = Compile_GetRegister(opcode.src_a, ecx);
+    const auto src = Compile_GetRegister(opcode.src_b, RESULT);
+
+    shr(src, dst.cvt8());
+    and_(src, opcode.GetBitfieldMask());
+    shl(src, opcode.bf_dst_bit);
+
+    Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
+    const auto dst = Compile_GetRegister(opcode.src_a, ecx);
+    const auto src = Compile_GetRegister(opcode.src_b, RESULT);
+
+    shr(src, opcode.bf_src_bit);
+    and_(src, opcode.GetBitfieldMask());
+    shl(src, dst.cvt8());
+
+    Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
+    if (optimizer.zero_reg_skip && opcode.src_a == 0) {
+        if (opcode.immediate == 0) {
+            xor_(RESULT, RESULT);
+        } else {
+            mov(RESULT, opcode.immediate);
+        }
+    } else {
+        auto result = Compile_GetRegister(opcode.src_a, RESULT);
+        if (opcode.immediate > 2) {
+            add(result, opcode.immediate);
+        } else if (opcode.immediate == 1) {
+            inc(result);
+        } else if (opcode.immediate < 0) {
+            sub(result, opcode.immediate * -1);
+        }
+    }
+
+    // Equivalent to Engines::Maxwell3D::GetRegisterValue:
+    if (optimizer.enable_asserts) {
+        Xbyak::Label pass_range_check;
+        cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS));
+        jb(pass_range_check);
+        int3();
+        L(pass_range_check);
+    }
+    mov(rax, qword[STATE]);
+    mov(RESULT,
+        dword[rax + offsetof(Engines::Maxwell3D, regs) +
+              offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
+
+    Compile_ProcessResult(opcode.result_operation, opcode.dst);
+}
+
+void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
+    maxwell3d->CallMethod(method_address.address, value, true);
+}
+
+void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
+    Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+    mov(Common::X64::ABI_PARAM1, qword[STATE]);
+    mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS);
+    mov(Common::X64::ABI_PARAM3.cvt32(), value);
+    Common::X64::CallFarFunction(*this, &Send);
+    Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+
+    Xbyak::Label dont_process{};
+    // Get increment
+    test(METHOD_ADDRESS, 0x3f000);
+    // If zero, method address doesn't update
+    je(dont_process);
+
+    mov(ecx, METHOD_ADDRESS);
+    and_(METHOD_ADDRESS, 0xfff);
+    shr(ecx, 12);
+    and_(ecx, 0x3f);
+    lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
+    sal(ecx, 12);
+    or_(eax, ecx);
+
+    mov(METHOD_ADDRESS, eax);
+
+    L(dont_process);
+}
+
+void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
+    ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
+    const s32 jump_address =
+        static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
+
+    Xbyak::Label end;
+    auto value = Compile_GetRegister(opcode.src_a, eax);
+    cmp(value, 0); // test(value, value);
+    if (optimizer.has_delayed_pc) {
+        switch (opcode.branch_condition) {
+        case Macro::BranchCondition::Zero:
+            jne(end, T_NEAR);
+            break;
+        case Macro::BranchCondition::NotZero:
+            je(end, T_NEAR);
+            break;
+        }
+
+        if (opcode.branch_annul) {
+            xor_(BRANCH_HOLDER, BRANCH_HOLDER);
+            jmp(labels[jump_address], T_NEAR);
+        } else {
+            Xbyak::Label handle_post_exit{};
+            Xbyak::Label skip{};
+            jmp(skip, T_NEAR);
+
+            L(handle_post_exit);
+            xor_(BRANCH_HOLDER, BRANCH_HOLDER);
+            jmp(labels[jump_address], T_NEAR);
+
+            L(skip);
+            mov(BRANCH_HOLDER, handle_post_exit);
+            jmp(delay_skip[pc], T_NEAR);
+        }
+    } else {
+        switch (opcode.branch_condition) {
+        case Macro::BranchCondition::Zero:
+            je(labels[jump_address], T_NEAR);
+            break;
+        case Macro::BranchCondition::NotZero:
+            jne(labels[jump_address], T_NEAR);
+            break;
+        }
+    }
+
+    L(end);
+}
+
+void MacroJITx64Impl::Optimizer_ScanFlags() {
+    optimizer.can_skip_carry = true;
+    optimizer.has_delayed_pc = false;
+    for (auto raw_op : code) {
+        Macro::Opcode op{};
+        op.raw = raw_op;
+
+        if (op.operation == Macro::Operation::ALU) {
+            // Scan for any ALU operations which actually use the carry flag, if they don't exist in
+            // our current code we can skip emitting the carry flag handling operations
+            if (op.alu_operation == Macro::ALUOperation::AddWithCarry ||
+                op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) {
+                optimizer.can_skip_carry = false;
+            }
+        }
+
+        if (op.operation == Macro::Operation::Branch) {
+            if (!op.branch_annul) {
+                optimizer.has_delayed_pc = true;
+            }
+        }
+    }
+}
+
+void MacroJITx64Impl::Compile() {
+    labels.fill(Xbyak::Label());
+
+    Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
+    // JIT state
+    mov(STATE, Common::X64::ABI_PARAM1);
+    mov(PARAMETERS, Common::X64::ABI_PARAM2);
+    mov(MAX_PARAMETER, Common::X64::ABI_PARAM3);
+    xor_(RESULT, RESULT);
+    xor_(METHOD_ADDRESS, METHOD_ADDRESS);
+    xor_(BRANCH_HOLDER, BRANCH_HOLDER);
+
+    mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
+
+    // Track get register for zero registers and mark it as no-op
+    optimizer.zero_reg_skip = true;
+
+    // AddImmediate tends to be used as a NOP instruction, if we detect this we can
+    // completely skip the entire code path and no emit anything
+    optimizer.skip_dummy_addimmediate = true;
+
+    // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting
+    // one if our register isn't "dirty"
+    optimizer.optimize_for_method_move = true;
+
+    // Enable run-time assertions in JITted code
+    optimizer.enable_asserts = false;
+
+    // Check to see if we can skip emitting certain instructions
+    Optimizer_ScanFlags();
+
+    const u32 op_count = static_cast<u32>(code.size());
+    for (u32 i = 0; i < op_count; i++) {
+        if (i < op_count - 1) {
+            pc = i + 1;
+            next_opcode = GetOpCode();
+        } else {
+            next_opcode = {};
+        }
+        pc = i;
+        Compile_NextInstruction();
+    }
+
+    L(end_of_code);
+
+    Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
+    ret();
+    ready();
+    program = getCode<ProgramType>();
+}
+
+bool MacroJITx64Impl::Compile_NextInstruction() {
+    const auto opcode = GetOpCode();
+    if (labels[pc].getAddress()) {
+        return false;
+    }
+
+    L(labels[pc]);
+
+    switch (opcode.operation) {
+    case Macro::Operation::ALU:
+        Compile_ALU(opcode);
+        break;
+    case Macro::Operation::AddImmediate:
+        Compile_AddImmediate(opcode);
+        break;
+    case Macro::Operation::ExtractInsert:
+        Compile_ExtractInsert(opcode);
+        break;
+    case Macro::Operation::ExtractShiftLeftImmediate:
+        Compile_ExtractShiftLeftImmediate(opcode);
+        break;
+    case Macro::Operation::ExtractShiftLeftRegister:
+        Compile_ExtractShiftLeftRegister(opcode);
+        break;
+    case Macro::Operation::Read:
+        Compile_Read(opcode);
+        break;
+    case Macro::Operation::Branch:
+        Compile_Branch(opcode);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value());
+        break;
+    }
+
+    if (optimizer.has_delayed_pc) {
+        if (opcode.is_exit) {
+            mov(rax, end_of_code);
+            test(BRANCH_HOLDER, BRANCH_HOLDER);
+            cmove(BRANCH_HOLDER, rax);
+            // Jump to next instruction to skip delay slot check
+            je(labels[pc + 1], T_NEAR);
+        } else {
+            // TODO(ogniK): Optimize delay slot branching
+            Xbyak::Label no_delay_slot{};
+            test(BRANCH_HOLDER, BRANCH_HOLDER);
+            je(no_delay_slot, T_NEAR);
+            mov(rax, BRANCH_HOLDER);
+            xor_(BRANCH_HOLDER, BRANCH_HOLDER);
+            jmp(rax);
+            L(no_delay_slot);
+        }
+        L(delay_skip[pc]);
+        if (opcode.is_exit) {
+            return false;
+        }
+    } else {
+        test(BRANCH_HOLDER, BRANCH_HOLDER);
+        jne(end_of_code, T_NEAR);
+        if (opcode.is_exit) {
+            inc(BRANCH_HOLDER);
+            return false;
+        }
+    }
+    return true;
+}
+
+static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) {
+    LOG_CRITICAL(HW_GPU,
+                 "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)",
+                 parameter, max_parameter - sizeof(u32));
+}
+
+Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
+    Xbyak::Label parameter_ok{};
+    cmp(PARAMETERS, MAX_PARAMETER);
+    jb(parameter_ok, T_NEAR);
+    Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+    mov(Common::X64::ABI_PARAM1, PARAMETERS);
+    mov(Common::X64::ABI_PARAM2, MAX_PARAMETER);
+    Common::X64::CallFarFunction(*this, &WarnInvalidParameter);
+    Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
+    L(parameter_ok);
+    mov(eax, dword[PARAMETERS]);
+    add(PARAMETERS, sizeof(u32));
+    return eax;
+}
+
+Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
+    if (index == 0) {
+        // Register 0 is always zero
+        xor_(dst, dst);
+    } else {
+        mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
+    }
+
+    return dst;
+}
+
+void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
+    const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) {
+        // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
+        // register.
+        if (reg_index == 0) {
+            return;
+        }
+        mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result);
+    };
+    const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); };
+
+    switch (operation) {
+    case Macro::ResultOperation::IgnoreAndFetch:
+        SetRegister(reg, Compile_FetchParameter());
+        break;
+    case Macro::ResultOperation::Move:
+        SetRegister(reg, RESULT);
+        break;
+    case Macro::ResultOperation::MoveAndSetMethod:
+        SetRegister(reg, RESULT);
+        SetMethodAddress(RESULT);
+        break;
+    case Macro::ResultOperation::FetchAndSend:
+        // Fetch parameter and send result.
+        SetRegister(reg, Compile_FetchParameter());
+        Compile_Send(RESULT);
+        break;
+    case Macro::ResultOperation::MoveAndSend:
+        // Move and send result.
+        SetRegister(reg, RESULT);
+        Compile_Send(RESULT);
+        break;
+    case Macro::ResultOperation::FetchAndSetMethod:
+        // Fetch parameter and use result as Method Address.
+        SetRegister(reg, Compile_FetchParameter());
+        SetMethodAddress(RESULT);
+        break;
+    case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
+        // Move result and use as Method Address, then fetch and send parameter.
+        SetRegister(reg, RESULT);
+        SetMethodAddress(RESULT);
+        Compile_Send(Compile_FetchParameter());
+        break;
+    case Macro::ResultOperation::MoveAndSetMethodSend:
+        // Move result and use as Method Address, then send bits 12:17 of result.
+        SetRegister(reg, RESULT);
+        SetMethodAddress(RESULT);
+        shr(RESULT, 12);
+        and_(RESULT, 0b111111);
+        Compile_Send(RESULT);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation);
+        break;
+    }
+}
+
+Macro::Opcode MacroJITx64Impl::GetOpCode() const {
+    ASSERT(pc < code.size());
+    return {code[pc]};
+}
+} // Anonymous namespace
+#endif
+
+static void Dump(u64 hash, std::span<const u32> code, bool decompiled = false) {
+    const auto base_dir{Common::FS::GetEdenPath(Common::FS::EdenPath::DumpDir)};
+    const auto macro_dir{base_dir / "macros"};
+    if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) {
+        LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories");
+        return;
+    }
+    auto name{macro_dir / fmt::format("{:016x}.macro", hash)};
+
+    if (decompiled) {
+        auto new_name{macro_dir / fmt::format("decompiled_{:016x}.macro", hash)};
+        if (Common::FS::Exists(name)) {
+            (void)Common::FS::RenameFile(name, new_name);
+            return;
+        }
+        name = new_name;
+    }
+
+    std::fstream macro_file(name, std::ios::out | std::ios::binary);
+    if (!macro_file) {
+        LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}", Common::FS::PathToUTF8String(name));
+        return;
+    }
+    macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
+}
+
+MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_, bool is_interpreted_)
+    : hle_macros{std::make_optional<Tegra::HLEMacro>(maxwell3d_)}
+    , maxwell3d{maxwell3d_}
+    , is_interpreted{is_interpreted_}
+{}
+
+MacroEngine::~MacroEngine() = default;
+
+void MacroEngine::AddCode(u32 method, u32 data) {
+    uploaded_macro_code[method].push_back(data);
+}
+
+void MacroEngine::ClearCode(u32 method) {
+    macro_cache.erase(method);
+    uploaded_macro_code.erase(method);
+}
+
+void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
+    auto compiled_macro = macro_cache.find(method);
+    if (compiled_macro != macro_cache.end()) {
+        const auto& cache_info = compiled_macro->second;
+        if (cache_info.has_hle_program) {
+            cache_info.hle_program->Execute(parameters, method);
+        } else {
+            maxwell3d.RefreshParameters();
+            cache_info.lle_program->Execute(parameters, method);
+        }
+    } else {
+        // Macro not compiled, check if it's uploaded and if so, compile it
+        std::optional<u32> mid_method;
+        const auto macro_code = uploaded_macro_code.find(method);
+        if (macro_code == uploaded_macro_code.end()) {
+            for (const auto& [method_base, code] : uploaded_macro_code) {
+                if (method >= method_base && (method - method_base) < code.size()) {
+                    mid_method = method_base;
+                    break;
+                }
+            }
+            if (!mid_method.has_value()) {
+                ASSERT_MSG(false, "Macro 0x{0:x} was not uploaded", method);
+                return;
+            }
+        }
+        auto& cache_info = macro_cache[method];
+
+        if (!mid_method.has_value()) {
+            cache_info.lle_program = Compile(macro_code->second);
+            cache_info.hash = Common::HashValue(macro_code->second);
+        } else {
+            const auto& macro_cached = uploaded_macro_code[mid_method.value()];
+            const auto rebased_method = method - mid_method.value();
+            auto& code = uploaded_macro_code[method];
+            code.resize(macro_cached.size() - rebased_method);
+            std::memcpy(code.data(), macro_cached.data() + rebased_method, code.size() * sizeof(u32));
+            cache_info.hash = Common::HashValue(code);
+            cache_info.lle_program = Compile(code);
+        }
+
+        auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
+        if (!hle_program || Settings::values.disable_macro_hle) {
+            maxwell3d.RefreshParameters();
+            cache_info.lle_program->Execute(parameters, method);
+        } else {
+            cache_info.has_hle_program = true;
+            cache_info.hle_program = std::move(hle_program);
+            cache_info.hle_program->Execute(parameters, method);
+        }
+
+        if (Settings::values.dump_macros) {
+            Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program);
+        }
+    }
+}
+
+std::unique_ptr<CachedMacro> MacroEngine::Compile(const std::vector<u32>& code) {
+#ifdef ARCHITECTURE_x86_64
+    if (!is_interpreted)
+        return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
+#endif
+    return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
+}
+
+std::optional<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) {
+#ifdef ARCHITECTURE_x86_64
+    return std::make_optional<MacroEngine>(maxwell3d, bool(Settings::values.disable_macro_jit));
+#else
+    return std::make_optional<MacroEngine>(maxwell3d, true);
+#endif
+}
+
+} // namespace Tegra
diff --git a/src/video_core/macro/macro.h b/src/video_core/macro.h
similarity index 74%
rename from src/video_core/macro/macro.h
rename to src/video_core/macro.h
index 737ced9a45..685097a693 100644
--- a/src/video_core/macro/macro.h
+++ b/src/video_core/macro.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
@@ -95,24 +98,34 @@ union MethodAddress {
 
 } // namespace Macro
 
-class HLEMacro;
-
 class CachedMacro {
 public:
+    CachedMacro(Engines::Maxwell3D& maxwell3d_)
+        : maxwell3d{maxwell3d_}
+    {}
     virtual ~CachedMacro() = default;
-    /**
-     * Executes the macro code with the specified input parameters.
-     *
-     * @param parameters The parameters of the macro
-     * @param method     The method to execute
-     */
+    /// Executes the macro code with the specified input parameters.
+    /// @param parameters The parameters of the macro
+    /// @param method     The method to execute
     virtual void Execute(const std::vector<u32>& parameters, u32 method) = 0;
+    Engines::Maxwell3D& maxwell3d;
+};
+
+class HLEMacro {
+public:
+    explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
+    ~HLEMacro();
+    // Allocates and returns a cached macro if the hash matches a known function.
+    // Returns nullptr otherwise.
+    [[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
+private:
+    Engines::Maxwell3D& maxwell3d;
 };
 
 class MacroEngine {
 public:
-    explicit MacroEngine(Engines::Maxwell3D& maxwell3d);
-    virtual ~MacroEngine();
+    explicit MacroEngine(Engines::Maxwell3D& maxwell3d, bool is_interpreted);
+    ~MacroEngine();
 
     // Store the uploaded macro code to compile them when they're called.
     void AddCode(u32 method, u32 data);
@@ -124,7 +137,7 @@ public:
     void Execute(u32 method, const std::vector<u32>& parameters);
 
 protected:
-    virtual std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) = 0;
+    std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code);
 
 private:
     struct CacheInfo {
@@ -136,10 +149,11 @@ private:
 
     std::unordered_map<u32, CacheInfo> macro_cache;
     std::unordered_map<u32, std::vector<u32>> uploaded_macro_code;
-    std::unique_ptr<HLEMacro> hle_macros;
+    std::optional<HLEMacro> hle_macros;
     Engines::Maxwell3D& maxwell3d;
+    bool is_interpreted;
 };
 
-std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
+std::optional<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d);
 
 } // namespace Tegra
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
deleted file mode 100644
index 2ff5e21c5e..0000000000
--- a/src/video_core/macro/macro.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <cstring>
-#include <fstream>
-#include <optional>
-#include <span>
-
-#include "common/container_hash.h"
-
-#include <fstream>
-#include "common/assert.h"
-#include "common/fs/fs.h"
-#include "common/fs/path_util.h"
-#include "common/settings.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/macro/macro.h"
-#include "video_core/macro/macro_hle.h"
-#include "video_core/macro/macro_interpreter.h"
-
-#ifdef ARCHITECTURE_x86_64
-#include "video_core/macro/macro_jit_x64.h"
-#endif
-
-namespace Tegra {
-
-static void Dump(u64 hash, std::span<const u32> code, bool decompiled = false) {
-    const auto base_dir{Common::FS::GetEdenPath(Common::FS::EdenPath::DumpDir)};
-    const auto macro_dir{base_dir / "macros"};
-    if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) {
-        LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories");
-        return;
-    }
-    auto name{macro_dir / fmt::format("{:016x}.macro", hash)};
-
-    if (decompiled) {
-        auto new_name{macro_dir / fmt::format("decompiled_{:016x}.macro", hash)};
-        if (Common::FS::Exists(name)) {
-            (void)Common::FS::RenameFile(name, new_name);
-            return;
-        }
-        name = new_name;
-    }
-
-    std::fstream macro_file(name, std::ios::out | std::ios::binary);
-    if (!macro_file) {
-        LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}",
-                  Common::FS::PathToUTF8String(name));
-        return;
-    }
-    macro_file.write(reinterpret_cast<const char*>(code.data()), code.size_bytes());
-}
-
-MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_)
-    : hle_macros{std::make_unique<Tegra::HLEMacro>(maxwell3d_)}, maxwell3d{maxwell3d_} {}
-
-MacroEngine::~MacroEngine() = default;
-
-void MacroEngine::AddCode(u32 method, u32 data) {
-    uploaded_macro_code[method].push_back(data);
-}
-
-void MacroEngine::ClearCode(u32 method) {
-    macro_cache.erase(method);
-    uploaded_macro_code.erase(method);
-}
-
-void MacroEngine::Execute(u32 method, const std::vector<u32>& parameters) {
-    auto compiled_macro = macro_cache.find(method);
-    if (compiled_macro != macro_cache.end()) {
-        const auto& cache_info = compiled_macro->second;
-        if (cache_info.has_hle_program) {
-            cache_info.hle_program->Execute(parameters, method);
-        } else {
-            maxwell3d.RefreshParameters();
-            cache_info.lle_program->Execute(parameters, method);
-        }
-    } else {
-        // Macro not compiled, check if it's uploaded and if so, compile it
-        std::optional<u32> mid_method;
-        const auto macro_code = uploaded_macro_code.find(method);
-        if (macro_code == uploaded_macro_code.end()) {
-            for (const auto& [method_base, code] : uploaded_macro_code) {
-                if (method >= method_base && (method - method_base) < code.size()) {
-                    mid_method = method_base;
-                    break;
-                }
-            }
-            if (!mid_method.has_value()) {
-                ASSERT_MSG(false, "Macro 0x{0:x} was not uploaded", method);
-                return;
-            }
-        }
-        auto& cache_info = macro_cache[method];
-
-        if (!mid_method.has_value()) {
-            cache_info.lle_program = Compile(macro_code->second);
-            cache_info.hash = Common::HashValue(macro_code->second);
-        } else {
-            const auto& macro_cached = uploaded_macro_code[mid_method.value()];
-            const auto rebased_method = method - mid_method.value();
-            auto& code = uploaded_macro_code[method];
-            code.resize(macro_cached.size() - rebased_method);
-            std::memcpy(code.data(), macro_cached.data() + rebased_method,
-                        code.size() * sizeof(u32));
-            cache_info.hash = Common::HashValue(code);
-            cache_info.lle_program = Compile(code);
-        }
-
-        auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
-        if (!hle_program || Settings::values.disable_macro_hle) {
-            maxwell3d.RefreshParameters();
-            cache_info.lle_program->Execute(parameters, method);
-        } else {
-            cache_info.has_hle_program = true;
-            cache_info.hle_program = std::move(hle_program);
-            cache_info.hle_program->Execute(parameters, method);
-        }
-
-        if (Settings::values.dump_macros) {
-            Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program);
-        }
-    }
-}
-
-std::unique_ptr<MacroEngine> GetMacroEngine(Engines::Maxwell3D& maxwell3d) {
-    if (Settings::values.disable_macro_jit) {
-        return std::make_unique<MacroInterpreter>(maxwell3d);
-    }
-#ifdef ARCHITECTURE_x86_64
-    return std::make_unique<MacroJITx64>(maxwell3d);
-#else
-    return std::make_unique<MacroInterpreter>(maxwell3d);
-#endif
-}
-
-} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
deleted file mode 100644
index 2f41e806c2..0000000000
--- a/src/video_core/macro/macro_hle.cpp
+++ /dev/null
@@ -1,606 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-#include <array>
-#include <vector>
-#include "common/assert.h"
-#include "common/scope_exit.h"
-#include "video_core/dirty_flags.h"
-#include "video_core/engines/draw_manager.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/macro/macro.h"
-#include "video_core/macro/macro_hle.h"
-#include "video_core/memory_manager.h"
-#include "video_core/rasterizer_interface.h"
-
-namespace Tegra {
-
-using Maxwell3D = Engines::Maxwell3D;
-
-namespace {
-
-bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) {
-    switch (topology) {
-    case Maxwell3D::Regs::PrimitiveTopology::Points:
-    case Maxwell3D::Regs::PrimitiveTopology::Lines:
-    case Maxwell3D::Regs::PrimitiveTopology::LineLoop:
-    case Maxwell3D::Regs::PrimitiveTopology::LineStrip:
-    case Maxwell3D::Regs::PrimitiveTopology::Triangles:
-    case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
-    case Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
-    case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
-    case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
-    case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
-    case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
-    case Maxwell3D::Regs::PrimitiveTopology::Patches:
-        return true;
-    case Maxwell3D::Regs::PrimitiveTopology::Quads:
-    case Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
-    case Maxwell3D::Regs::PrimitiveTopology::Polygon:
-    default:
-        return false;
-    }
-}
-
-class HLEMacroImpl : public CachedMacro {
-public:
-    explicit HLEMacroImpl(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {}
-
-protected:
-    Maxwell3D& maxwell3d;
-};
-
-/*
- * @note: these macros have two versions, a normal and extended version, with the extended version
- * also assigning the base vertex/instance.
- */
-template <bool extended>
-class HLE_DrawArraysIndirect final : public HLEMacroImpl {
-public:
-    explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
-        if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
-            Fallback(parameters);
-            return;
-        }
-
-        auto& params = maxwell3d.draw_manager->GetIndirectParams();
-        params.is_byte_count = false;
-        params.is_indexed = false;
-        params.include_count = false;
-        params.count_start_address = 0;
-        params.indirect_start_address = maxwell3d.GetMacroAddress(1);
-        params.buffer_size = 4 * sizeof(u32);
-        params.max_draw_counts = 1;
-        params.stride = 0;
-
-        if constexpr (extended) {
-            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
-            maxwell3d.SetHLEReplacementAttributeType(
-                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
-        }
-
-        maxwell3d.draw_manager->DrawArrayIndirect(topology);
-
-        if constexpr (extended) {
-            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
-            maxwell3d.replace_table.clear();
-        }
-    }
-
-private:
-    void Fallback(const std::vector<u32>& parameters) {
-        SCOPE_EXIT {
-            if (extended) {
-                maxwell3d.engine_state = Maxwell3D::EngineHint::None;
-                maxwell3d.replace_table.clear();
-            }
-        };
-        maxwell3d.RefreshParameters();
-        const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
-
-        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
-        const u32 vertex_first = parameters[3];
-        const u32 vertex_count = parameters[1];
-
-        if (!IsTopologySafe(topology) &&
-            static_cast<size_t>(maxwell3d.GetMaxCurrentVertices()) <
-                static_cast<size_t>(vertex_first) + static_cast<size_t>(vertex_count)) {
-            ASSERT_MSG(false, "Faulty draw!");
-            return;
-        }
-
-        const u32 base_instance = parameters[4];
-        if constexpr (extended) {
-            maxwell3d.regs.global_base_instance_index = base_instance;
-            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
-            maxwell3d.SetHLEReplacementAttributeType(
-                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
-        }
-
-        maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance,
-                                          instance_count);
-
-        if constexpr (extended) {
-            maxwell3d.regs.global_base_instance_index = 0;
-            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
-            maxwell3d.replace_table.clear();
-        }
-    }
-};
-
-/*
- * @note: these macros have two versions, a normal and extended version, with the extended version
- * also assigning the base vertex/instance.
- */
-template <bool extended>
-class HLE_DrawIndexedIndirect final : public HLEMacroImpl {
-public:
-    explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
-        if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) {
-            Fallback(parameters);
-            return;
-        }
-
-        const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
-        const u32 element_base = parameters[4];
-        const u32 base_instance = parameters[5];
-        maxwell3d.regs.vertex_id_base = element_base;
-        maxwell3d.regs.global_base_vertex_index = element_base;
-        maxwell3d.regs.global_base_instance_index = base_instance;
-        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-        if constexpr (extended) {
-            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
-            maxwell3d.SetHLEReplacementAttributeType(
-                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
-            maxwell3d.SetHLEReplacementAttributeType(
-                0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
-        }
-        auto& params = maxwell3d.draw_manager->GetIndirectParams();
-        params.is_byte_count = false;
-        params.is_indexed = true;
-        params.include_count = false;
-        params.count_start_address = 0;
-        params.indirect_start_address = maxwell3d.GetMacroAddress(1);
-        params.buffer_size = 5 * sizeof(u32);
-        params.max_draw_counts = 1;
-        params.stride = 0;
-        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-        maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
-        maxwell3d.regs.vertex_id_base = 0x0;
-        maxwell3d.regs.global_base_vertex_index = 0x0;
-        maxwell3d.regs.global_base_instance_index = 0x0;
-        if constexpr (extended) {
-            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
-            maxwell3d.replace_table.clear();
-        }
-    }
-
-private:
-    void Fallback(const std::vector<u32>& parameters) {
-        maxwell3d.RefreshParameters();
-        const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]);
-        const u32 element_base = parameters[4];
-        const u32 base_instance = parameters[5];
-        maxwell3d.regs.vertex_id_base = element_base;
-        maxwell3d.regs.global_base_vertex_index = element_base;
-        maxwell3d.regs.global_base_instance_index = base_instance;
-        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-        if constexpr (extended) {
-            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
-            maxwell3d.SetHLEReplacementAttributeType(
-                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
-            maxwell3d.SetHLEReplacementAttributeType(
-                0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
-        }
-
-        maxwell3d.draw_manager->DrawIndex(
-            static_cast<Tegra::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), parameters[3],
-            parameters[1], element_base, base_instance, instance_count);
-
-        maxwell3d.regs.vertex_id_base = 0x0;
-        maxwell3d.regs.global_base_vertex_index = 0x0;
-        maxwell3d.regs.global_base_instance_index = 0x0;
-        if constexpr (extended) {
-            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
-            maxwell3d.replace_table.clear();
-        }
-    }
-};
-
-class HLE_MultiLayerClear final : public HLEMacroImpl {
-public:
-    explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        maxwell3d.RefreshParameters();
-        ASSERT(parameters.size() == 1);
-
-        const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]};
-        const u32 rt_index = clear_params.RT;
-        const u32 num_layers = maxwell3d.regs.rt[rt_index].depth;
-        ASSERT(clear_params.layer == 0);
-
-        maxwell3d.regs.clear_surface.raw = clear_params.raw;
-        maxwell3d.draw_manager->Clear(num_layers);
-    }
-};
-
-class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl {
-public:
-    explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
-        if (!IsTopologySafe(topology)) {
-            Fallback(parameters);
-            return;
-        }
-
-        const u32 start_indirect = parameters[0];
-        const u32 end_indirect = parameters[1];
-        if (start_indirect >= end_indirect) {
-            // Nothing to do.
-            return;
-        }
-
-        const u32 padding = parameters[3]; // padding is in words
-
-        // size of each indirect segment
-        const u32 indirect_words = 5 + padding;
-        const u32 stride = indirect_words * sizeof(u32);
-        const std::size_t draw_count = end_indirect - start_indirect;
-        const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
-        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-        auto& params = maxwell3d.draw_manager->GetIndirectParams();
-        params.is_byte_count = false;
-        params.is_indexed = true;
-        params.include_count = true;
-        params.count_start_address = maxwell3d.GetMacroAddress(4);
-        params.indirect_start_address = maxwell3d.GetMacroAddress(5);
-        params.buffer_size = stride * draw_count;
-        params.max_draw_counts = draw_count;
-        params.stride = stride;
-        maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-        maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
-        maxwell3d.SetHLEReplacementAttributeType(
-            0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
-        maxwell3d.SetHLEReplacementAttributeType(
-            0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
-        maxwell3d.SetHLEReplacementAttributeType(0, 0x648,
-                                                 Maxwell3D::HLEReplacementAttributeType::DrawID);
-        maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate);
-        maxwell3d.engine_state = Maxwell3D::EngineHint::None;
-        maxwell3d.replace_table.clear();
-    }
-
-private:
-    void Fallback(const std::vector<u32>& parameters) {
-        SCOPE_EXIT {
-            // Clean everything.
-            maxwell3d.regs.vertex_id_base = 0x0;
-            maxwell3d.engine_state = Maxwell3D::EngineHint::None;
-            maxwell3d.replace_table.clear();
-        };
-        maxwell3d.RefreshParameters();
-        const u32 start_indirect = parameters[0];
-        const u32 end_indirect = parameters[1];
-        if (start_indirect >= end_indirect) {
-            // Nothing to do.
-            return;
-        }
-        const auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
-        const u32 padding = parameters[3];
-        const std::size_t max_draws = parameters[4];
-
-        const u32 indirect_words = 5 + padding;
-        const std::size_t first_draw = start_indirect;
-        const std::size_t effective_draws = end_indirect - start_indirect;
-        const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws);
-
-        for (std::size_t index = first_draw; index < last_draw; index++) {
-            const std::size_t base = index * indirect_words + 5;
-            const u32 base_vertex = parameters[base + 3];
-            const u32 base_instance = parameters[base + 4];
-            maxwell3d.regs.vertex_id_base = base_vertex;
-            maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro;
-            maxwell3d.SetHLEReplacementAttributeType(
-                0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex);
-            maxwell3d.SetHLEReplacementAttributeType(
-                0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance);
-            maxwell3d.CallMethod(0x8e3, 0x648, true);
-            maxwell3d.CallMethod(0x8e4, static_cast<u32>(index), true);
-            maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
-            maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
-                                              base_vertex, base_instance, parameters[base + 1]);
-        }
-    }
-};
-
-class HLE_DrawIndirectByteCount final : public HLEMacroImpl {
-public:
-    explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback();
-
-        auto topology = static_cast<Maxwell3D::Regs::PrimitiveTopology>(parameters[0] & 0xFFFFU);
-        if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) {
-            Fallback(parameters);
-            return;
-        }
-        auto& params = maxwell3d.draw_manager->GetIndirectParams();
-        params.is_byte_count = true;
-        params.is_indexed = false;
-        params.include_count = false;
-        params.count_start_address = 0;
-        params.indirect_start_address = maxwell3d.GetMacroAddress(2);
-        params.buffer_size = 4;
-        params.max_draw_counts = 1;
-        params.stride = parameters[1];
-        maxwell3d.regs.draw.begin = parameters[0];
-        maxwell3d.regs.draw_auto_stride = parameters[1];
-        maxwell3d.regs.draw_auto_byte_count = parameters[2];
-
-        maxwell3d.draw_manager->DrawArrayIndirect(topology);
-    }
-
-private:
-    void Fallback(const std::vector<u32>& parameters) {
-        maxwell3d.RefreshParameters();
-
-        maxwell3d.regs.draw.begin = parameters[0];
-        maxwell3d.regs.draw_auto_stride = parameters[1];
-        maxwell3d.regs.draw_auto_byte_count = parameters[2];
-
-        maxwell3d.draw_manager->DrawArray(
-            maxwell3d.regs.draw.topology, 0,
-            maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1);
-    }
-};
-
-class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl {
-public:
-    explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        maxwell3d.RefreshParameters();
-        const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2;
-        const u32 address = maxwell3d.regs.shadow_scratch[24];
-        auto& const_buffer = maxwell3d.regs.const_buffer;
-        const_buffer.size = 0x7000;
-        const_buffer.address_high = (address >> 24) & 0xFF;
-        const_buffer.address_low = address << 8;
-        const_buffer.offset = offset;
-    }
-};
-
-class HLE_D7333D26E0A93EDE final : public HLEMacroImpl {
-public:
-    explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        maxwell3d.RefreshParameters();
-        const size_t index = parameters[0];
-        const u32 address = maxwell3d.regs.shadow_scratch[42 + index];
-        const u32 size = maxwell3d.regs.shadow_scratch[47 + index];
-        auto& const_buffer = maxwell3d.regs.const_buffer;
-        const_buffer.size = size;
-        const_buffer.address_high = (address >> 24) & 0xFF;
-        const_buffer.address_low = address << 8;
-    }
-};
-
-class HLE_BindShader final : public HLEMacroImpl {
-public:
-    explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        maxwell3d.RefreshParameters();
-        auto& regs = maxwell3d.regs;
-        const u32 index = parameters[0];
-        if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) {
-            return;
-        }
-
-        regs.pipelines[index & 0xF].offset = parameters[2];
-        maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true;
-        regs.shadow_scratch[28 + index] = parameters[1];
-        regs.shadow_scratch[34 + index] = parameters[2];
-
-        const u32 address = parameters[4];
-        auto& const_buffer = regs.const_buffer;
-        const_buffer.size = 0x10000;
-        const_buffer.address_high = (address >> 24) & 0xFF;
-        const_buffer.address_low = address << 8;
-
-        const size_t bind_group_id = parameters[3] & 0x7F;
-        auto& bind_group = regs.bind_groups[bind_group_id];
-        bind_group.raw_config = 0x11;
-        maxwell3d.ProcessCBBind(bind_group_id);
-    }
-};
-
-class HLE_SetRasterBoundingBox final : public HLEMacroImpl {
-public:
-    explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        maxwell3d.RefreshParameters();
-        const u32 raster_mode = parameters[0];
-        auto& regs = maxwell3d.regs;
-        const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable;
-        const u32 scratch_data = maxwell3d.regs.shadow_scratch[52];
-        regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F;
-        regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled);
-    }
-};
-
-template <size_t base_size>
-class HLE_ClearConstBuffer final : public HLEMacroImpl {
-public:
-    explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        maxwell3d.RefreshParameters();
-        static constexpr std::array<u32, base_size> zeroes{};
-        auto& regs = maxwell3d.regs;
-        regs.const_buffer.size = static_cast<u32>(base_size);
-        regs.const_buffer.address_high = parameters[0];
-        regs.const_buffer.address_low = parameters[1];
-        regs.const_buffer.offset = 0;
-        maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4);
-    }
-};
-
-class HLE_ClearMemory final : public HLEMacroImpl {
-public:
-    explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        maxwell3d.RefreshParameters();
-
-        const u32 needed_memory = parameters[2] / sizeof(u32);
-        if (needed_memory > zero_memory.size()) {
-            zero_memory.resize(needed_memory, 0);
-        }
-        auto& regs = maxwell3d.regs;
-        regs.upload.line_length_in = parameters[2];
-        regs.upload.line_count = 1;
-        regs.upload.dest.address_high = parameters[0];
-        regs.upload.dest.address_low = parameters[1];
-        maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
-        maxwell3d.CallMultiMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)),
-                                  zero_memory.data(), needed_memory, needed_memory);
-    }
-
-private:
-    std::vector<u32> zero_memory;
-};
-
-class HLE_TransformFeedbackSetup final : public HLEMacroImpl {
-public:
-    explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
-
-    void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
-        maxwell3d.RefreshParameters();
-
-        auto& regs = maxwell3d.regs;
-        regs.transform_feedback_enabled = 1;
-        regs.transform_feedback.buffers[0].start_offset = 0;
-        regs.transform_feedback.buffers[1].start_offset = 0;
-        regs.transform_feedback.buffers[2].start_offset = 0;
-        regs.transform_feedback.buffers[3].start_offset = 0;
-
-        regs.upload.line_length_in = 4;
-        regs.upload.line_count = 1;
-        regs.upload.dest.address_high = parameters[0];
-        regs.upload.dest.address_low = parameters[1];
-        maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true);
-        maxwell3d.CallMethod(static_cast<size_t>(MAXWELL3D_REG_INDEX(inline_data)),
-                             regs.transform_feedback.controls[0].stride, true);
-
-        maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address());
-    }
-};
-
-} // Anonymous namespace
-
-HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {
-    builders.emplace(0x0D61FC9FAAC9FCADULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_DrawArraysIndirect<false>>(maxwell3d__);
-                         }));
-    builders.emplace(0x8A4D173EB99A8603ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_DrawArraysIndirect<true>>(maxwell3d__);
-                         }));
-    builders.emplace(0x771BB18C62444DA0ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_DrawIndexedIndirect<false>>(maxwell3d__);
-                         }));
-    builders.emplace(0x0217920100488FF7ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_DrawIndexedIndirect<true>>(maxwell3d__);
-                         }));
-    builders.emplace(0x3F5E74B9C9A50164ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_MultiDrawIndexedIndirectCount>(
-                                 maxwell3d__);
-                         }));
-    builders.emplace(0xEAD26C3E2109B06BULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_MultiLayerClear>(maxwell3d__);
-                         }));
-    builders.emplace(0xC713C83D8F63CCF3ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_C713C83D8F63CCF3>(maxwell3d__);
-                         }));
-    builders.emplace(0xD7333D26E0A93EDEULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_D7333D26E0A93EDE>(maxwell3d__);
-                         }));
-    builders.emplace(0xEB29B2A09AA06D38ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_BindShader>(maxwell3d__);
-                         }));
-    builders.emplace(0xDB1341DBEB4C8AF7ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_SetRasterBoundingBox>(maxwell3d__);
-                         }));
-    builders.emplace(0x6C97861D891EDf7EULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_ClearConstBuffer<0x5F00>>(maxwell3d__);
-                         }));
-    builders.emplace(0xD246FDDF3A6173D7ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_ClearConstBuffer<0x7000>>(maxwell3d__);
-                         }));
-    builders.emplace(0xEE4D0004BEC8ECF4ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_ClearMemory>(maxwell3d__);
-                         }));
-    builders.emplace(0xFC0CF27F5FFAA661ULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_TransformFeedbackSetup>(maxwell3d__);
-                         }));
-    builders.emplace(0xB5F74EDB717278ECULL,
-                     std::function<std::unique_ptr<CachedMacro>(Maxwell3D&)>(
-                         [](Maxwell3D& maxwell3d__) -> std::unique_ptr<CachedMacro> {
-                             return std::make_unique<HLE_DrawIndirectByteCount>(maxwell3d__);
-                         }));
-}
-
-HLEMacro::~HLEMacro() = default;
-
-std::unique_ptr<CachedMacro> HLEMacro::GetHLEProgram(u64 hash) const {
-    const auto it = builders.find(hash);
-    if (it == builders.end()) {
-        return nullptr;
-    }
-    return it->second(maxwell3d);
-}
-
-} // namespace Tegra
diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h
deleted file mode 100644
index 33f92fab16..0000000000
--- a/src/video_core/macro/macro_hle.h
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <functional>
-#include <memory>
-#include <unordered_map>
-
-#include "common/common_types.h"
-
-namespace Tegra {
-
-namespace Engines {
-class Maxwell3D;
-}
-
-class HLEMacro {
-public:
-    explicit HLEMacro(Engines::Maxwell3D& maxwell3d_);
-    ~HLEMacro();
-
-    // Allocates and returns a cached macro if the hash matches a known function.
-    // Returns nullptr otherwise.
-    [[nodiscard]] std::unique_ptr<CachedMacro> GetHLEProgram(u64 hash) const;
-
-private:
-    Engines::Maxwell3D& maxwell3d;
-    std::unordered_map<u64, std::function<std::unique_ptr<CachedMacro>(Engines::Maxwell3D&)>>
-        builders;
-};
-
-} // namespace Tegra
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
deleted file mode 100644
index f9befce676..0000000000
--- a/src/video_core/macro/macro_interpreter.cpp
+++ /dev/null
@@ -1,362 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <array>
-#include <optional>
-
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/macro/macro_interpreter.h"
-
-namespace Tegra {
-namespace {
-class MacroInterpreterImpl final : public CachedMacro {
-public:
-    explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
-        : maxwell3d{maxwell3d_}, code{code_} {}
-
-    void Execute(const std::vector<u32>& params, u32 method) override;
-
-private:
-    /// Resets the execution engine state, zeroing registers, etc.
-    void Reset();
-
-    /**
-     * Executes a single macro instruction located at the current program counter. Returns whether
-     * the interpreter should keep running.
-     *
-     * @param is_delay_slot Whether the current step is being executed due to a delay slot in a
-     *                      previous instruction.
-     */
-    bool Step(bool is_delay_slot);
-
-    /// Calculates the result of an ALU operation. src_a OP src_b;
-    u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b);
-
-    /// Performs the result operation on the input result and stores it in the specified register
-    /// (if necessary).
-    void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result);
-
-    /// Evaluates the branch condition and returns whether the branch should be taken or not.
-    bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const;
-
-    /// Reads an opcode at the current program counter location.
-    Macro::Opcode GetOpcode() const;
-
-    /// Returns the specified register's value. Register 0 is hardcoded to always return 0.
-    u32 GetRegister(u32 register_id) const;
-
-    /// Sets the register to the input value.
-    void SetRegister(u32 register_id, u32 value);
-
-    /// Sets the method address to use for the next Send instruction.
-    void SetMethodAddress(u32 address);
-
-    /// Calls a GPU Engine method with the input parameter.
-    void Send(u32 value);
-
-    /// Reads a GPU register located at the method address.
-    u32 Read(u32 method) const;
-
-    /// Returns the next parameter in the parameter queue.
-    u32 FetchParameter();
-
-    Engines::Maxwell3D& maxwell3d;
-
-    /// Current program counter
-    u32 pc{};
-    /// Program counter to execute at after the delay slot is executed.
-    std::optional<u32> delayed_pc;
-
-    /// General purpose macro registers.
-    std::array<u32, Macro::NUM_MACRO_REGISTERS> registers = {};
-
-    /// Method address to use for the next Send instruction.
-    Macro::MethodAddress method_address = {};
-
-    /// Input parameters of the current macro.
-    std::unique_ptr<u32[]> parameters;
-    std::size_t num_parameters = 0;
-    std::size_t parameters_capacity = 0;
-    /// Index of the next parameter that will be fetched by the 'parm' instruction.
-    u32 next_parameter_index = 0;
-
-    bool carry_flag = false;
-    const std::vector<u32>& code;
-};
-
-void MacroInterpreterImpl::Execute(const std::vector<u32>& params, u32 method) {
-    Reset();
-
-    registers[1] = params[0];
-    num_parameters = params.size();
-
-    if (num_parameters > parameters_capacity) {
-        parameters_capacity = num_parameters;
-        parameters = std::make_unique<u32[]>(num_parameters);
-    }
-    std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32));
-
-    // Execute the code until we hit an exit condition.
-    bool keep_executing = true;
-    while (keep_executing) {
-        keep_executing = Step(false);
-    }
-
-    // Assert the the macro used all the input parameters
-    ASSERT(next_parameter_index == num_parameters);
-}
-
-void MacroInterpreterImpl::Reset() {
-    registers = {};
-    pc = 0;
-    delayed_pc = {};
-    method_address.raw = 0;
-    num_parameters = 0;
-    // The next parameter index starts at 1, because $r1 already has the value of the first
-    // parameter.
-    next_parameter_index = 1;
-    carry_flag = false;
-}
-
-bool MacroInterpreterImpl::Step(bool is_delay_slot) {
-    u32 base_address = pc;
-
-    Macro::Opcode opcode = GetOpcode();
-    pc += 4;
-
-    // Update the program counter if we were delayed
-    if (delayed_pc) {
-        ASSERT(is_delay_slot);
-        pc = *delayed_pc;
-        delayed_pc = {};
-    }
-
-    switch (opcode.operation) {
-    case Macro::Operation::ALU: {
-        u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a),
-                                  GetRegister(opcode.src_b));
-        ProcessResult(opcode.result_operation, opcode.dst, result);
-        break;
-    }
-    case Macro::Operation::AddImmediate: {
-        ProcessResult(opcode.result_operation, opcode.dst,
-                      GetRegister(opcode.src_a) + opcode.immediate);
-        break;
-    }
-    case Macro::Operation::ExtractInsert: {
-        u32 dst = GetRegister(opcode.src_a);
-        u32 src = GetRegister(opcode.src_b);
-
-        src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask();
-        dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
-        dst |= src << opcode.bf_dst_bit;
-        ProcessResult(opcode.result_operation, opcode.dst, dst);
-        break;
-    }
-    case Macro::Operation::ExtractShiftLeftImmediate: {
-        u32 dst = GetRegister(opcode.src_a);
-        u32 src = GetRegister(opcode.src_b);
-
-        u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit;
-
-        ProcessResult(opcode.result_operation, opcode.dst, result);
-        break;
-    }
-    case Macro::Operation::ExtractShiftLeftRegister: {
-        u32 dst = GetRegister(opcode.src_a);
-        u32 src = GetRegister(opcode.src_b);
-
-        u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst;
-
-        ProcessResult(opcode.result_operation, opcode.dst, result);
-        break;
-    }
-    case Macro::Operation::Read: {
-        u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate);
-        ProcessResult(opcode.result_operation, opcode.dst, result);
-        break;
-    }
-    case Macro::Operation::Branch: {
-        ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
-        u32 value = GetRegister(opcode.src_a);
-        bool taken = EvaluateBranchCondition(opcode.branch_condition, value);
-        if (taken) {
-            // Ignore the delay slot if the branch has the annul bit.
-            if (opcode.branch_annul) {
-                pc = base_address + opcode.GetBranchTarget();
-                return true;
-            }
-
-            delayed_pc = base_address + opcode.GetBranchTarget();
-            // Execute one more instruction due to the delay slot.
-            return Step(true);
-        }
-        break;
-    }
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value());
-        break;
-    }
-
-    // An instruction with the Exit flag will not actually
-    // cause an exit if it's executed inside a delay slot.
-    if (opcode.is_exit && !is_delay_slot) {
-        // Exit has a delay slot, execute the next instruction
-        Step(true);
-        return false;
-    }
-
-    return true;
-}
-
-u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) {
-    switch (operation) {
-    case Macro::ALUOperation::Add: {
-        const u64 result{static_cast<u64>(src_a) + src_b};
-        carry_flag = result > 0xffffffff;
-        return static_cast<u32>(result);
-    }
-    case Macro::ALUOperation::AddWithCarry: {
-        const u64 result{static_cast<u64>(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)};
-        carry_flag = result > 0xffffffff;
-        return static_cast<u32>(result);
-    }
-    case Macro::ALUOperation::Subtract: {
-        const u64 result{static_cast<u64>(src_a) - src_b};
-        carry_flag = result < 0x100000000;
-        return static_cast<u32>(result);
-    }
-    case Macro::ALUOperation::SubtractWithBorrow: {
-        const u64 result{static_cast<u64>(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)};
-        carry_flag = result < 0x100000000;
-        return static_cast<u32>(result);
-    }
-    case Macro::ALUOperation::Xor:
-        return src_a ^ src_b;
-    case Macro::ALUOperation::Or:
-        return src_a | src_b;
-    case Macro::ALUOperation::And:
-        return src_a & src_b;
-    case Macro::ALUOperation::AndNot:
-        return src_a & ~src_b;
-    case Macro::ALUOperation::Nand:
-        return ~(src_a & src_b);
-
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation);
-        return 0;
-    }
-}
-
-void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) {
-    switch (operation) {
-    case Macro::ResultOperation::IgnoreAndFetch:
-        // Fetch parameter and ignore result.
-        SetRegister(reg, FetchParameter());
-        break;
-    case Macro::ResultOperation::Move:
-        // Move result.
-        SetRegister(reg, result);
-        break;
-    case Macro::ResultOperation::MoveAndSetMethod:
-        // Move result and use as Method Address.
-        SetRegister(reg, result);
-        SetMethodAddress(result);
-        break;
-    case Macro::ResultOperation::FetchAndSend:
-        // Fetch parameter and send result.
-        SetRegister(reg, FetchParameter());
-        Send(result);
-        break;
-    case Macro::ResultOperation::MoveAndSend:
-        // Move and send result.
-        SetRegister(reg, result);
-        Send(result);
-        break;
-    case Macro::ResultOperation::FetchAndSetMethod:
-        // Fetch parameter and use result as Method Address.
-        SetRegister(reg, FetchParameter());
-        SetMethodAddress(result);
-        break;
-    case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
-        // Move result and use as Method Address, then fetch and send parameter.
-        SetRegister(reg, result);
-        SetMethodAddress(result);
-        Send(FetchParameter());
-        break;
-    case Macro::ResultOperation::MoveAndSetMethodSend:
-        // Move result and use as Method Address, then send bits 12:17 of result.
-        SetRegister(reg, result);
-        SetMethodAddress(result);
-        Send((result >> 12) & 0b111111);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation);
-        break;
-    }
-}
-
-bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const {
-    switch (cond) {
-    case Macro::BranchCondition::Zero:
-        return value == 0;
-    case Macro::BranchCondition::NotZero:
-        return value != 0;
-    }
-    UNREACHABLE();
-}
-
-Macro::Opcode MacroInterpreterImpl::GetOpcode() const {
-    ASSERT((pc % sizeof(u32)) == 0);
-    ASSERT(pc < code.size() * sizeof(u32));
-    return {code[pc / sizeof(u32)]};
-}
-
-u32 MacroInterpreterImpl::GetRegister(u32 register_id) const {
-    return registers.at(register_id);
-}
-
-void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) {
-    // Register 0 is hardwired as the zero register.
-    // Ensure no writes to it actually occur.
-    if (register_id == 0) {
-        return;
-    }
-
-    registers.at(register_id) = value;
-}
-
-void MacroInterpreterImpl::SetMethodAddress(u32 address) {
-    method_address.raw = address;
-}
-
-void MacroInterpreterImpl::Send(u32 value) {
-    maxwell3d.CallMethod(method_address.address, value, true);
-    // Increment the method address by the method increment.
-    method_address.address.Assign(method_address.address.Value() +
-                                  method_address.increment.Value());
-}
-
-u32 MacroInterpreterImpl::Read(u32 method) const {
-    return maxwell3d.GetRegisterValue(method);
-}
-
-u32 MacroInterpreterImpl::FetchParameter() {
-    ASSERT(next_parameter_index < num_parameters);
-    return parameters[next_parameter_index++];
-}
-} // Anonymous namespace
-
-MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_)
-    : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
-
-std::unique_ptr<CachedMacro> MacroInterpreter::Compile(const std::vector<u32>& code) {
-    return std::make_unique<MacroInterpreterImpl>(maxwell3d, code);
-}
-
-} // namespace Tegra
diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h
deleted file mode 100644
index f5eeb0b76f..0000000000
--- a/src/video_core/macro/macro_interpreter.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/macro/macro.h"
-
-namespace Tegra {
-namespace Engines {
-class Maxwell3D;
-}
-
-class MacroInterpreter final : public MacroEngine {
-public:
-    explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d_);
-
-protected:
-    std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
-
-private:
-    Engines::Maxwell3D& maxwell3d;
-};
-
-} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
deleted file mode 100644
index 65935f6c62..0000000000
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ /dev/null
@@ -1,678 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <array>
-#include <bitset>
-#include <optional>
-
-#include <xbyak/xbyak.h>
-
-#include "common/assert.h"
-#include "common/bit_field.h"
-#include "common/logging/log.h"
-#include "common/x64/xbyak_abi.h"
-#include "common/x64/xbyak_util.h"
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/macro/macro_interpreter.h"
-#include "video_core/macro/macro_jit_x64.h"
-
-namespace Tegra {
-namespace {
-constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
-constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d;
-constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11;
-constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
-constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
-constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
-
-constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
-    STATE,
-    RESULT,
-    MAX_PARAMETER,
-    PARAMETERS,
-    METHOD_ADDRESS,
-    BRANCH_HOLDER,
-});
-
-// Arbitrarily chosen based on current booting games.
-constexpr size_t MAX_CODE_SIZE = 0x10000;
-
-std::bitset<32> PersistentCallerSavedRegs() {
-    return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED;
-}
-
-/// @brief Must enforce W^X constraints, as we yet don't havea  global "NO_EXECUTE" support flag
-/// the speed loss is minimal, and in fact may be negligible, however for your peace of mind
-/// I simply included known OSes whom had W^X issues
-#if defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
-static const auto default_cg_mode = Xbyak::DontSetProtectRWE;
-#else
-static const auto default_cg_mode = nullptr; //Allow RWE
-#endif
-
-class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro {
-public:
-    explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector<u32>& code_)
-        : Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode)
-        , code{code_}, maxwell3d{maxwell3d_} {
-        Compile();
-    }
-
-    void Execute(const std::vector<u32>& parameters, u32 method) override;
-
-    void Compile_ALU(Macro::Opcode opcode);
-    void Compile_AddImmediate(Macro::Opcode opcode);
-    void Compile_ExtractInsert(Macro::Opcode opcode);
-    void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode);
-    void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode);
-    void Compile_Read(Macro::Opcode opcode);
-    void Compile_Branch(Macro::Opcode opcode);
-
-private:
-    void Optimizer_ScanFlags();
-
-    void Compile();
-    bool Compile_NextInstruction();
-
-    Xbyak::Reg32 Compile_FetchParameter();
-    Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst);
-
-    void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg);
-    void Compile_Send(Xbyak::Reg32 value);
-
-    Macro::Opcode GetOpCode() const;
-
-    struct JITState {
-        Engines::Maxwell3D* maxwell3d{};
-        std::array<u32, Macro::NUM_MACRO_REGISTERS> registers{};
-        u32 carry_flag{};
-    };
-    static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0");
-    using ProgramType = void (*)(JITState*, const u32*, const u32*);
-
-    struct OptimizerState {
-        bool can_skip_carry{};
-        bool has_delayed_pc{};
-        bool zero_reg_skip{};
-        bool skip_dummy_addimmediate{};
-        bool optimize_for_method_move{};
-        bool enable_asserts{};
-    };
-    OptimizerState optimizer{};
-
-    std::optional<Macro::Opcode> next_opcode{};
-    ProgramType program{nullptr};
-
-    std::array<Xbyak::Label, MAX_CODE_SIZE> labels;
-    std::array<Xbyak::Label, MAX_CODE_SIZE> delay_skip;
-    Xbyak::Label end_of_code{};
-
-    bool is_delay_slot{};
-    u32 pc{};
-
-    const std::vector<u32>& code;
-    Engines::Maxwell3D& maxwell3d;
-};
-
-void MacroJITx64Impl::Execute(const std::vector<u32>& parameters, u32 method) {
-    ASSERT_OR_EXECUTE(program != nullptr, { return; });
-    JITState state{};
-    state.maxwell3d = &maxwell3d;
-    state.registers = {};
-    program(&state, parameters.data(), parameters.data() + parameters.size());
-}
-
-void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) {
-    const bool is_a_zero = opcode.src_a == 0;
-    const bool is_b_zero = opcode.src_b == 0;
-    const bool valid_operation = !is_a_zero && !is_b_zero;
-    [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero;
-    const bool has_zero_register = is_a_zero || is_b_zero;
-    const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry ||
-                                  opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow;
-
-    Xbyak::Reg32 src_a;
-    Xbyak::Reg32 src_b;
-
-    if (!optimizer.zero_reg_skip || no_zero_reg_skip) {
-        src_a = Compile_GetRegister(opcode.src_a, RESULT);
-        src_b = Compile_GetRegister(opcode.src_b, eax);
-    } else {
-        if (!is_a_zero) {
-            src_a = Compile_GetRegister(opcode.src_a, RESULT);
-        }
-        if (!is_b_zero) {
-            src_b = Compile_GetRegister(opcode.src_b, eax);
-        }
-    }
-
-    bool has_emitted = false;
-
-    switch (opcode.alu_operation) {
-    case Macro::ALUOperation::Add:
-        if (optimizer.zero_reg_skip) {
-            if (valid_operation) {
-                add(src_a, src_b);
-            }
-        } else {
-            add(src_a, src_b);
-        }
-
-        if (!optimizer.can_skip_carry) {
-            setc(byte[STATE + offsetof(JITState, carry_flag)]);
-        }
-        break;
-    case Macro::ALUOperation::AddWithCarry:
-        bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
-        adc(src_a, src_b);
-        setc(byte[STATE + offsetof(JITState, carry_flag)]);
-        break;
-    case Macro::ALUOperation::Subtract:
-        if (optimizer.zero_reg_skip) {
-            if (valid_operation) {
-                sub(src_a, src_b);
-                has_emitted = true;
-            }
-        } else {
-            sub(src_a, src_b);
-            has_emitted = true;
-        }
-        if (!optimizer.can_skip_carry && has_emitted) {
-            setc(byte[STATE + offsetof(JITState, carry_flag)]);
-        }
-        break;
-    case Macro::ALUOperation::SubtractWithBorrow:
-        bt(dword[STATE + offsetof(JITState, carry_flag)], 0);
-        sbb(src_a, src_b);
-        setc(byte[STATE + offsetof(JITState, carry_flag)]);
-        break;
-    case Macro::ALUOperation::Xor:
-        if (optimizer.zero_reg_skip) {
-            if (valid_operation) {
-                xor_(src_a, src_b);
-            }
-        } else {
-            xor_(src_a, src_b);
-        }
-        break;
-    case Macro::ALUOperation::Or:
-        if (optimizer.zero_reg_skip) {
-            if (valid_operation) {
-                or_(src_a, src_b);
-            }
-        } else {
-            or_(src_a, src_b);
-        }
-        break;
-    case Macro::ALUOperation::And:
-        if (optimizer.zero_reg_skip) {
-            if (!has_zero_register) {
-                and_(src_a, src_b);
-            }
-        } else {
-            and_(src_a, src_b);
-        }
-        break;
-    case Macro::ALUOperation::AndNot:
-        if (optimizer.zero_reg_skip) {
-            if (!is_a_zero) {
-                not_(src_b);
-                and_(src_a, src_b);
-            }
-        } else {
-            not_(src_b);
-            and_(src_a, src_b);
-        }
-        break;
-    case Macro::ALUOperation::Nand:
-        if (optimizer.zero_reg_skip) {
-            if (!is_a_zero) {
-                and_(src_a, src_b);
-                not_(src_a);
-            }
-        } else {
-            and_(src_a, src_b);
-            not_(src_a);
-        }
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value());
-        break;
-    }
-    Compile_ProcessResult(opcode.result_operation, opcode.dst);
-}
-
-void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) {
-    if (optimizer.skip_dummy_addimmediate) {
-        // Games tend to use this as an exit instruction placeholder. It's to encode an instruction
-        // without doing anything. In our case we can just not emit anything.
-        if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) {
-            return;
-        }
-    }
-    // Check for redundant moves
-    if (optimizer.optimize_for_method_move &&
-        opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) {
-        if (next_opcode.has_value()) {
-            const auto next = *next_opcode;
-            if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod &&
-                opcode.dst == next.dst) {
-                return;
-            }
-        }
-    }
-    if (optimizer.zero_reg_skip && opcode.src_a == 0) {
-        if (opcode.immediate == 0) {
-            xor_(RESULT, RESULT);
-        } else {
-            mov(RESULT, opcode.immediate);
-        }
-    } else {
-        auto result = Compile_GetRegister(opcode.src_a, RESULT);
-        if (opcode.immediate > 2) {
-            add(result, opcode.immediate);
-        } else if (opcode.immediate == 1) {
-            inc(result);
-        } else if (opcode.immediate < 0) {
-            sub(result, opcode.immediate * -1);
-        }
-    }
-    Compile_ProcessResult(opcode.result_operation, opcode.dst);
-}
-
-void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) {
-    auto dst = Compile_GetRegister(opcode.src_a, RESULT);
-    auto src = Compile_GetRegister(opcode.src_b, eax);
-
-    const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit);
-    and_(dst, mask);
-    shr(src, opcode.bf_src_bit);
-    and_(src, opcode.GetBitfieldMask());
-    shl(src, opcode.bf_dst_bit);
-    or_(dst, src);
-
-    Compile_ProcessResult(opcode.result_operation, opcode.dst);
-}
-
-void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) {
-    const auto dst = Compile_GetRegister(opcode.src_a, ecx);
-    const auto src = Compile_GetRegister(opcode.src_b, RESULT);
-
-    shr(src, dst.cvt8());
-    and_(src, opcode.GetBitfieldMask());
-    shl(src, opcode.bf_dst_bit);
-
-    Compile_ProcessResult(opcode.result_operation, opcode.dst);
-}
-
-void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) {
-    const auto dst = Compile_GetRegister(opcode.src_a, ecx);
-    const auto src = Compile_GetRegister(opcode.src_b, RESULT);
-
-    shr(src, opcode.bf_src_bit);
-    and_(src, opcode.GetBitfieldMask());
-    shl(src, dst.cvt8());
-
-    Compile_ProcessResult(opcode.result_operation, opcode.dst);
-}
-
-void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) {
-    if (optimizer.zero_reg_skip && opcode.src_a == 0) {
-        if (opcode.immediate == 0) {
-            xor_(RESULT, RESULT);
-        } else {
-            mov(RESULT, opcode.immediate);
-        }
-    } else {
-        auto result = Compile_GetRegister(opcode.src_a, RESULT);
-        if (opcode.immediate > 2) {
-            add(result, opcode.immediate);
-        } else if (opcode.immediate == 1) {
-            inc(result);
-        } else if (opcode.immediate < 0) {
-            sub(result, opcode.immediate * -1);
-        }
-    }
-
-    // Equivalent to Engines::Maxwell3D::GetRegisterValue:
-    if (optimizer.enable_asserts) {
-        Xbyak::Label pass_range_check;
-        cmp(RESULT, static_cast<u32>(Engines::Maxwell3D::Regs::NUM_REGS));
-        jb(pass_range_check);
-        int3();
-        L(pass_range_check);
-    }
-    mov(rax, qword[STATE]);
-    mov(RESULT,
-        dword[rax + offsetof(Engines::Maxwell3D, regs) +
-              offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]);
-
-    Compile_ProcessResult(opcode.result_operation, opcode.dst);
-}
-
-void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) {
-    maxwell3d->CallMethod(method_address.address, value, true);
-}
-
-void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) {
-    Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
-    mov(Common::X64::ABI_PARAM1, qword[STATE]);
-    mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS);
-    mov(Common::X64::ABI_PARAM3.cvt32(), value);
-    Common::X64::CallFarFunction(*this, &Send);
-    Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
-
-    Xbyak::Label dont_process{};
-    // Get increment
-    test(METHOD_ADDRESS, 0x3f000);
-    // If zero, method address doesn't update
-    je(dont_process);
-
-    mov(ecx, METHOD_ADDRESS);
-    and_(METHOD_ADDRESS, 0xfff);
-    shr(ecx, 12);
-    and_(ecx, 0x3f);
-    lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]);
-    sal(ecx, 12);
-    or_(eax, ecx);
-
-    mov(METHOD_ADDRESS, eax);
-
-    L(dont_process);
-}
-
-void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) {
-    ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid");
-    const s32 jump_address =
-        static_cast<s32>(pc) + static_cast<s32>(opcode.GetBranchTarget() / sizeof(s32));
-
-    Xbyak::Label end;
-    auto value = Compile_GetRegister(opcode.src_a, eax);
-    cmp(value, 0); // test(value, value);
-    if (optimizer.has_delayed_pc) {
-        switch (opcode.branch_condition) {
-        case Macro::BranchCondition::Zero:
-            jne(end, T_NEAR);
-            break;
-        case Macro::BranchCondition::NotZero:
-            je(end, T_NEAR);
-            break;
-        }
-
-        if (opcode.branch_annul) {
-            xor_(BRANCH_HOLDER, BRANCH_HOLDER);
-            jmp(labels[jump_address], T_NEAR);
-        } else {
-            Xbyak::Label handle_post_exit{};
-            Xbyak::Label skip{};
-            jmp(skip, T_NEAR);
-
-            L(handle_post_exit);
-            xor_(BRANCH_HOLDER, BRANCH_HOLDER);
-            jmp(labels[jump_address], T_NEAR);
-
-            L(skip);
-            mov(BRANCH_HOLDER, handle_post_exit);
-            jmp(delay_skip[pc], T_NEAR);
-        }
-    } else {
-        switch (opcode.branch_condition) {
-        case Macro::BranchCondition::Zero:
-            je(labels[jump_address], T_NEAR);
-            break;
-        case Macro::BranchCondition::NotZero:
-            jne(labels[jump_address], T_NEAR);
-            break;
-        }
-    }
-
-    L(end);
-}
-
-void MacroJITx64Impl::Optimizer_ScanFlags() {
-    optimizer.can_skip_carry = true;
-    optimizer.has_delayed_pc = false;
-    for (auto raw_op : code) {
-        Macro::Opcode op{};
-        op.raw = raw_op;
-
-        if (op.operation == Macro::Operation::ALU) {
-            // Scan for any ALU operations which actually use the carry flag, if they don't exist in
-            // our current code we can skip emitting the carry flag handling operations
-            if (op.alu_operation == Macro::ALUOperation::AddWithCarry ||
-                op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) {
-                optimizer.can_skip_carry = false;
-            }
-        }
-
-        if (op.operation == Macro::Operation::Branch) {
-            if (!op.branch_annul) {
-                optimizer.has_delayed_pc = true;
-            }
-        }
-    }
-}
-
-void MacroJITx64Impl::Compile() {
-    labels.fill(Xbyak::Label());
-
-    Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
-    // JIT state
-    mov(STATE, Common::X64::ABI_PARAM1);
-    mov(PARAMETERS, Common::X64::ABI_PARAM2);
-    mov(MAX_PARAMETER, Common::X64::ABI_PARAM3);
-    xor_(RESULT, RESULT);
-    xor_(METHOD_ADDRESS, METHOD_ADDRESS);
-    xor_(BRANCH_HOLDER, BRANCH_HOLDER);
-
-    mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter());
-
-    // Track get register for zero registers and mark it as no-op
-    optimizer.zero_reg_skip = true;
-
-    // AddImmediate tends to be used as a NOP instruction, if we detect this we can
-    // completely skip the entire code path and no emit anything
-    optimizer.skip_dummy_addimmediate = true;
-
-    // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting
-    // one if our register isn't "dirty"
-    optimizer.optimize_for_method_move = true;
-
-    // Enable run-time assertions in JITted code
-    optimizer.enable_asserts = false;
-
-    // Check to see if we can skip emitting certain instructions
-    Optimizer_ScanFlags();
-
-    const u32 op_count = static_cast<u32>(code.size());
-    for (u32 i = 0; i < op_count; i++) {
-        if (i < op_count - 1) {
-            pc = i + 1;
-            next_opcode = GetOpCode();
-        } else {
-            next_opcode = {};
-        }
-        pc = i;
-        Compile_NextInstruction();
-    }
-
-    L(end_of_code);
-
-    Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8);
-    ret();
-    ready();
-    program = getCode<ProgramType>();
-}
-
-bool MacroJITx64Impl::Compile_NextInstruction() {
-    const auto opcode = GetOpCode();
-    if (labels[pc].getAddress()) {
-        return false;
-    }
-
-    L(labels[pc]);
-
-    switch (opcode.operation) {
-    case Macro::Operation::ALU:
-        Compile_ALU(opcode);
-        break;
-    case Macro::Operation::AddImmediate:
-        Compile_AddImmediate(opcode);
-        break;
-    case Macro::Operation::ExtractInsert:
-        Compile_ExtractInsert(opcode);
-        break;
-    case Macro::Operation::ExtractShiftLeftImmediate:
-        Compile_ExtractShiftLeftImmediate(opcode);
-        break;
-    case Macro::Operation::ExtractShiftLeftRegister:
-        Compile_ExtractShiftLeftRegister(opcode);
-        break;
-    case Macro::Operation::Read:
-        Compile_Read(opcode);
-        break;
-    case Macro::Operation::Branch:
-        Compile_Branch(opcode);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value());
-        break;
-    }
-
-    if (optimizer.has_delayed_pc) {
-        if (opcode.is_exit) {
-            mov(rax, end_of_code);
-            test(BRANCH_HOLDER, BRANCH_HOLDER);
-            cmove(BRANCH_HOLDER, rax);
-            // Jump to next instruction to skip delay slot check
-            je(labels[pc + 1], T_NEAR);
-        } else {
-            // TODO(ogniK): Optimize delay slot branching
-            Xbyak::Label no_delay_slot{};
-            test(BRANCH_HOLDER, BRANCH_HOLDER);
-            je(no_delay_slot, T_NEAR);
-            mov(rax, BRANCH_HOLDER);
-            xor_(BRANCH_HOLDER, BRANCH_HOLDER);
-            jmp(rax);
-            L(no_delay_slot);
-        }
-        L(delay_skip[pc]);
-        if (opcode.is_exit) {
-            return false;
-        }
-    } else {
-        test(BRANCH_HOLDER, BRANCH_HOLDER);
-        jne(end_of_code, T_NEAR);
-        if (opcode.is_exit) {
-            inc(BRANCH_HOLDER);
-            return false;
-        }
-    }
-    return true;
-}
-
-static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) {
-    LOG_CRITICAL(HW_GPU,
-                 "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)",
-                 parameter, max_parameter - sizeof(u32));
-}
-
-Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() {
-    Xbyak::Label parameter_ok{};
-    cmp(PARAMETERS, MAX_PARAMETER);
-    jb(parameter_ok, T_NEAR);
-    Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
-    mov(Common::X64::ABI_PARAM1, PARAMETERS);
-    mov(Common::X64::ABI_PARAM2, MAX_PARAMETER);
-    Common::X64::CallFarFunction(*this, &WarnInvalidParameter);
-    Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
-    L(parameter_ok);
-    mov(eax, dword[PARAMETERS]);
-    add(PARAMETERS, sizeof(u32));
-    return eax;
-}
-
-Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) {
-    if (index == 0) {
-        // Register 0 is always zero
-        xor_(dst, dst);
-    } else {
-        mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]);
-    }
-
-    return dst;
-}
-
-void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) {
-    const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) {
-        // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero
-        // register.
-        if (reg_index == 0) {
-            return;
-        }
-        mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result);
-    };
-    const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); };
-
-    switch (operation) {
-    case Macro::ResultOperation::IgnoreAndFetch:
-        SetRegister(reg, Compile_FetchParameter());
-        break;
-    case Macro::ResultOperation::Move:
-        SetRegister(reg, RESULT);
-        break;
-    case Macro::ResultOperation::MoveAndSetMethod:
-        SetRegister(reg, RESULT);
-        SetMethodAddress(RESULT);
-        break;
-    case Macro::ResultOperation::FetchAndSend:
-        // Fetch parameter and send result.
-        SetRegister(reg, Compile_FetchParameter());
-        Compile_Send(RESULT);
-        break;
-    case Macro::ResultOperation::MoveAndSend:
-        // Move and send result.
-        SetRegister(reg, RESULT);
-        Compile_Send(RESULT);
-        break;
-    case Macro::ResultOperation::FetchAndSetMethod:
-        // Fetch parameter and use result as Method Address.
-        SetRegister(reg, Compile_FetchParameter());
-        SetMethodAddress(RESULT);
-        break;
-    case Macro::ResultOperation::MoveAndSetMethodFetchAndSend:
-        // Move result and use as Method Address, then fetch and send parameter.
-        SetRegister(reg, RESULT);
-        SetMethodAddress(RESULT);
-        Compile_Send(Compile_FetchParameter());
-        break;
-    case Macro::ResultOperation::MoveAndSetMethodSend:
-        // Move result and use as Method Address, then send bits 12:17 of result.
-        SetRegister(reg, RESULT);
-        SetMethodAddress(RESULT);
-        shr(RESULT, 12);
-        and_(RESULT, 0b111111);
-        Compile_Send(RESULT);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation);
-        break;
-    }
-}
-
-Macro::Opcode MacroJITx64Impl::GetOpCode() const {
-    ASSERT(pc < code.size());
-    return {code[pc]};
-}
-} // Anonymous namespace
-
-MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_)
-    : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {}
-
-std::unique_ptr<CachedMacro> MacroJITx64::Compile(const std::vector<u32>& code) {
-    return std::make_unique<MacroJITx64Impl>(maxwell3d, code);
-}
-} // namespace Tegra
diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h
deleted file mode 100644
index 99ee1b9e68..0000000000
--- a/src/video_core/macro/macro_jit_x64.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include "common/common_types.h"
-#include "video_core/macro/macro.h"
-
-namespace Tegra {
-
-namespace Engines {
-class Maxwell3D;
-}
-
-class MacroJITx64 final : public MacroEngine {
-public:
-    explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_);
-
-protected:
-    std::unique_ptr<CachedMacro> Compile(const std::vector<u32>& code) override;
-
-private:
-    Engines::Maxwell3D& maxwell3d;
-};
-
-} // namespace Tegra
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 75254049a6..14ab5dd967 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -1214,19 +1214,16 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageV
 ImageView::~ImageView() = default;
 
 GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) {
-    if (image_format == Shader::ImageFormat::Typeless) {
+    if (image_format == Shader::ImageFormat::Typeless)
         return Handle(texture_type);
-    }
-    const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
-                         image_format == Shader::ImageFormat::R16_SINT};
-    if (!storage_views) {
-        storage_views = std::make_unique<StorageViews>();
-    }
+    const bool is_signed = image_format == Shader::ImageFormat::R8_SINT
+        || image_format == Shader::ImageFormat::R16_SINT;
+    if (!storage_views)
+        storage_views.emplace();
     auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds};
-    GLuint& view{type_views[static_cast<size_t>(texture_type)]};
-    if (view == 0) {
+    GLuint& view{type_views[size_t(texture_type)]};
+    if (view == 0)
         view = MakeView(texture_type, ShaderFormat(image_format));
-    }
     return view;
 }
 
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 3de24508fe..e2a2022cb2 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -302,7 +302,7 @@ private:
 
     std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
     std::vector<OGLTextureView> stored_views;
-    std::unique_ptr<StorageViews> storage_views;
+    std::optional<StorageViews> storage_views;
     GLenum internal_format = GL_NONE;
     GLuint default_handle = 0;
     u32 buffer_size = 0;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 101a884fd7..c3a5ed391b 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -376,7 +376,6 @@ void RasterizerVulkan::DrawTexture() {
 }
 
 void RasterizerVulkan::Clear(u32 layer_count) {
-
     FlushWork();
     gpu_memory->FlushCaching();
 
@@ -396,9 +395,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
     scheduler.RequestRenderpass(framebuffer);
 
     query_cache.NotifySegment(true);
-    query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
-                              maxwell3d->regs.zpass_pixel_count_enable);
-
+    query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable);
     u32 up_scale = 1;
     u32 down_shift = 0;
     if (texture_cache.IsRescaling()) {
@@ -443,14 +440,14 @@ void RasterizerVulkan::Clear(u32 layer_count) {
                 offset = 0;
                 return;
             }
-            if (offset >= static_cast<s32>(limit)) {
-                offset = static_cast<s32>(limit);
+            if (offset >= s32(limit)) {
+                offset = s32(limit);
                 extent = 0;
                 return;
             }
-            const u64 end_coord = static_cast<u64>(offset) + extent;
+            const u64 end_coord = u64(offset) + extent;
             if (end_coord > limit) {
-                extent = limit - static_cast<u32>(offset);
+                extent = limit - u32(offset);
             }
         };
 
@@ -464,30 +461,22 @@ void RasterizerVulkan::Clear(u32 layer_count) {
 
     const u32 color_attachment = regs.clear_surface.RT;
     if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
-        const auto format =
-            VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format);
+        const auto format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format);
         bool is_integer = IsPixelFormatInteger(format);
         bool is_signed = IsPixelFormatSignedInteger(format);
         size_t int_size = PixelComponentSizeBitsInteger(format);
         VkClearValue clear_value{};
         if (!is_integer) {
-            std::memcpy(clear_value.color.float32, regs.clear_color.data(),
-                        regs.clear_color.size() * sizeof(f32));
+            std::memcpy(clear_value.color.float32, regs.clear_color.data(), regs.clear_color.size() * sizeof(f32));
         } else if (!is_signed) {
-            for (size_t i = 0; i < 4; i++) {
-                clear_value.color.uint32[i] = static_cast<u32>(
-                    static_cast<f32>(static_cast<u64>(int_size) << 1U) * regs.clear_color[i]);
-            }
+            for (size_t i = 0; i < 4; i++)
+                clear_value.color.uint32[i] = u32(f32(u64(int_size) << 1U) * regs.clear_color[i]);
         } else {
-            for (size_t i = 0; i < 4; i++) {
-                clear_value.color.int32[i] =
-                    static_cast<s32>(static_cast<f32>(static_cast<s64>(int_size - 1) << 1) *
-                                     (regs.clear_color[i] - 0.5f));
-            }
+            for (size_t i = 0; i < 4; i++)
+                clear_value.color.int32[i] = s32(f32(s64(int_size - 1) << 1) * (regs.clear_color[i] - 0.5f));
         }
 
-        if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B &&
-            regs.clear_surface.A) {
+        if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B && regs.clear_surface.A) {
             scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
                 const VkClearAttachment attachment{
                     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
@@ -497,14 +486,11 @@ void RasterizerVulkan::Clear(u32 layer_count) {
                 cmdbuf.ClearAttachments(attachment, clear_rect);
             });
         } else {
-            u8 color_mask = static_cast<u8>(regs.clear_surface.R | regs.clear_surface.G << 1 |
-                                            regs.clear_surface.B << 2 | regs.clear_surface.A << 3);
+            u8 color_mask = u8(regs.clear_surface.R | regs.clear_surface.G << 1 | regs.clear_surface.B << 2 | regs.clear_surface.A << 3);
             Region2D dst_region = {
                 Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
-                Offset2D{.x = clear_rect.rect.offset.x +
-                              static_cast<s32>(clear_rect.rect.extent.width),
-                         .y = clear_rect.rect.offset.y +
-                              static_cast<s32>(clear_rect.rect.extent.height)}};
+                Offset2D{.x = clear_rect.rect.offset.x + s32(clear_rect.rect.extent.width),
+                         .y = clear_rect.rect.offset.y + s32(clear_rect.rect.extent.height)}};
             blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region);
         }
     }
@@ -527,11 +513,10 @@ void RasterizerVulkan::Clear(u32 layer_count) {
         regs.stencil_front_mask != 0) {
         Region2D dst_region = {
             Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
-            Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width),
-                     .y = clear_rect.rect.offset.y +
-                          static_cast<s32>(clear_rect.rect.extent.height)}};
+            Offset2D{.x = clear_rect.rect.offset.x + s32(clear_rect.rect.extent.width),
+                     .y = clear_rect.rect.offset.y + s32(clear_rect.rect.extent.height)}};
         blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth,
-                                     static_cast<u8>(regs.stencil_front_mask), regs.clear_stencil,
+                                     u8(regs.stencil_front_mask), regs.clear_stencil,
                                      regs.stencil_front_func_mask, dst_region);
     } else {
         scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index cff7a73903..a950ffed7a 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -860,8 +860,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched
                                   compute_pass_descriptor_queue, memory_allocator);
     }
     if (device.IsStorageImageMultisampleSupported()) {
-        msaa_copy_pass = std::make_unique<MSAACopyPass>(
-            device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
+        msaa_copy_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue);
     }
     if (!device.IsKhrImageFormatListSupported()) {
         return;
@@ -1675,10 +1674,10 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
     // CHANGE: Gate the MSAA path more strictly and only use it for color, when the pass and device
     //         support are available. Avoid running the MSAA path when prerequisites aren't met,
     //         preventing validation and runtime issues.
-    const bool wants_msaa_upload = info.num_samples > 1 &&
-                                   (aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0 &&
-                                   runtime->CanUploadMSAA() && runtime->msaa_copy_pass != nullptr &&
-                                   runtime->device.IsStorageImageMultisampleSupported();
+    const bool wants_msaa_upload = info.num_samples > 1
+        && (aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0
+        && runtime->CanUploadMSAA() && runtime->msaa_copy_pass.has_value()
+        && runtime->device.IsStorageImageMultisampleSupported();
 
     if (wants_msaa_upload) {
         // Create a temporary non-MSAA image to upload the data first
@@ -2047,8 +2046,7 @@ bool Image::BlitScaleHelper(bool scale_up) {
     const u32 scaled_width = resolution.ScaleUp(info.size.width);
     const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height;
     std::unique_ptr<ImageView>& blit_view = scale_up ? scale_view : normal_view;
-    std::unique_ptr<Framebuffer>& blit_framebuffer =
-        scale_up ? scale_framebuffer : normal_framebuffer;
+    std::optional<Framebuffer>& blit_framebuffer = scale_up ? scale_framebuffer : normal_framebuffer;
     if (!blit_view) {
         const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format);
         blit_view = std::make_unique<ImageView>(*runtime, view_info, NULL_IMAGE_ID, *this);
@@ -2060,11 +2058,11 @@ bool Image::BlitScaleHelper(bool scale_up) {
     const u32 dst_height = scale_up ? scaled_height : info.size.height;
     const Region2D src_region{
         .start = {0, 0},
-        .end = {static_cast<s32>(src_width), static_cast<s32>(src_height)},
+        .end = {s32(src_width), s32(src_height)},
     };
     const Region2D dst_region{
         .start = {0, 0},
-        .end = {static_cast<s32>(dst_width), static_cast<s32>(dst_height)},
+        .end = {s32(dst_width), s32(dst_height)},
     };
     const VkExtent2D extent{
         .width = (std::max)(scaled_width, info.size.width),
@@ -2073,21 +2071,15 @@ bool Image::BlitScaleHelper(bool scale_up) {
 
     auto* view_ptr = blit_view.get();
     if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
-        if (!blit_framebuffer) {
-            blit_framebuffer =
-                std::make_unique<Framebuffer>(*runtime, view_ptr, nullptr, extent, scale_up);
-        }
-
-        runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), *blit_view, dst_region,
-                                             src_region, operation, BLIT_OPERATION);
+        if (!blit_framebuffer)
+            blit_framebuffer.emplace(*runtime, view_ptr, nullptr, extent, scale_up);
+        runtime->blit_image_helper.BlitColor(&*blit_framebuffer, *blit_view,
+            dst_region, src_region, operation, BLIT_OPERATION);
     } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
-        if (!blit_framebuffer) {
-            blit_framebuffer =
-                std::make_unique<Framebuffer>(*runtime, nullptr, view_ptr, extent, scale_up);
-        }
-        runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), *blit_view,
-                                                    dst_region, src_region, operation,
-                                                    BLIT_OPERATION);
+        if (!blit_framebuffer)
+            blit_framebuffer.emplace(*runtime, nullptr, view_ptr, extent, scale_up);
+        runtime->blit_image_helper.BlitDepthStencil(&*blit_framebuffer, *blit_view,
+            dst_region, src_region, operation, BLIT_OPERATION);
     } else {
         // TODO: Use helper blits where applicable
         flags &= ~ImageFlagBits::Rescaled;
@@ -2200,9 +2192,9 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
     }
 }
 
-ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info,
-                     ImageId image_id_, Image& image, const SlotVector<Image>& slot_imgs)
-    : ImageView{runtime, info, image_id_, image} {
+ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image, const SlotVector<Image>& slot_imgs)
+    : ImageView{runtime, info, image_id_, image}
+{
     slot_images = &slot_imgs;
 }
 
@@ -2267,33 +2259,25 @@ VkImageView ImageView::ColorView() {
 
 VkImageView ImageView::StorageView(Shader::TextureType texture_type,
                                    Shader::ImageFormat image_format) {
-    if (!image_handle) {
-        return VK_NULL_HANDLE;
-    }
-    if (image_format == Shader::ImageFormat::Typeless) {
-        return Handle(texture_type);
-    }
-    const bool is_signed{image_format == Shader::ImageFormat::R8_SINT ||
-                         image_format == Shader::ImageFormat::R16_SINT};
-    if (!storage_views) {
-        storage_views = std::make_unique<StorageViews>();
-    }
-    auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
-    auto& view{views[static_cast<size_t>(texture_type)]};
-    if (view) {
+    if (image_handle) {
+        if (image_format == Shader::ImageFormat::Typeless) {
+            return Handle(texture_type);
+        }
+        const bool is_signed = image_format == Shader::ImageFormat::R8_SINT
+            || image_format == Shader::ImageFormat::R16_SINT;
+        if (!storage_views)
+            storage_views.emplace();
+        auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds};
+        auto& view{views[size_t(texture_type)]};
+        if (!view)
+            view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
         return *view;
     }
-    view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT);
-    return *view;
+    return VK_NULL_HANDLE;
 }
 
 bool ImageView::IsRescaled() const noexcept {
-    if (!slot_images) {
-        return false;
-    }
-    const auto& slots = *slot_images;
-    const auto& src_image = slots[image_id];
-    return src_image.IsRescaled();
+    return (*slot_images)[image_id].IsRescaled();
 }
 
 vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index dcc835f05e..4bb9687ab0 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -133,7 +133,7 @@ public:
     vk::Buffer swizzle_table_buffer;
     VkDeviceSize swizzle_table_size = 0;
 
-    std::unique_ptr<MSAACopyPass> msaa_copy_pass;
+    std::optional<MSAACopyPass> msaa_copy_pass;
     const Settings::ResolutionScalingInfo& resolution;
     std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
 
@@ -141,6 +141,89 @@ public:
     std::array<vk::Buffer, indexing_slots> buffers{};
 };
 
+class Framebuffer {
+public:
+    explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
+                         ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
+
+    explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
+                         ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
+
+    ~Framebuffer();
+
+    Framebuffer(const Framebuffer&) = delete;
+    Framebuffer& operator=(const Framebuffer&) = delete;
+
+    Framebuffer(Framebuffer&&) = default;
+    Framebuffer& operator=(Framebuffer&&) = default;
+
+    void CreateFramebuffer(TextureCacheRuntime& runtime,
+                           std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
+                           bool is_rescaled = false);
+
+    [[nodiscard]] VkFramebuffer Handle() const noexcept {
+        return *framebuffer;
+    }
+
+    [[nodiscard]] VkRenderPass RenderPass() const noexcept {
+        return renderpass;
+    }
+
+    [[nodiscard]] VkExtent2D RenderArea() const noexcept {
+        return render_area;
+    }
+
+    [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
+        return samples;
+    }
+
+    [[nodiscard]] u32 NumColorBuffers() const noexcept {
+        return num_color_buffers;
+    }
+
+    [[nodiscard]] u32 NumImages() const noexcept {
+        return num_images;
+    }
+
+    [[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
+        return images;
+    }
+
+    [[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
+        return image_ranges;
+    }
+
+    [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
+        return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
+    }
+
+    [[nodiscard]] bool HasAspectDepthBit() const noexcept {
+        return has_depth;
+    }
+
+    [[nodiscard]] bool HasAspectStencilBit() const noexcept {
+        return has_stencil;
+    }
+
+    [[nodiscard]] bool IsRescaled() const noexcept {
+        return is_rescaled;
+    }
+
+private:
+    vk::Framebuffer framebuffer;
+    VkRenderPass renderpass{};
+    VkExtent2D render_area{};
+    VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
+    u32 num_color_buffers = 0;
+    u32 num_images = 0;
+    std::array<VkImage, 9> images{};
+    std::array<VkImageSubresourceRange, 9> image_ranges{};
+    std::array<size_t, NUM_RT> rt_map{};
+    bool has_depth{};
+    bool has_stencil{};
+    bool is_rescaled{};
+};
+
 class Image : public VideoCommon::ImageBase {
 public:
     explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
@@ -226,10 +309,9 @@ private:
     VkImageAspectFlags aspect_mask = 0;
     bool initialized = false;
 
-    std::unique_ptr<Framebuffer> scale_framebuffer;
+    std::optional<Framebuffer> scale_framebuffer;
+    std::optional<Framebuffer> normal_framebuffer;
     std::unique_ptr<ImageView> scale_view;
-
-    std::unique_ptr<Framebuffer> normal_framebuffer;
     std::unique_ptr<ImageView> normal_view;
 };
 
@@ -297,7 +379,7 @@ private:
     const SlotVector<Image>* slot_images = nullptr;
 
     std::array<vk::ImageView, Shader::NUM_TEXTURE_TYPES> image_views;
-    std::unique_ptr<StorageViews> storage_views;
+    std::optional<StorageViews> storage_views;
     vk::ImageView depth_view;
     vk::ImageView stencil_view;
     vk::ImageView color_view;
@@ -331,89 +413,6 @@ private:
     vk::Sampler sampler_default_anisotropy;
 };
 
-class Framebuffer {
-public:
-    explicit Framebuffer(TextureCacheRuntime& runtime, std::span<ImageView*, NUM_RT> color_buffers,
-                         ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
-
-    explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer,
-                         ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled);
-
-    ~Framebuffer();
-
-    Framebuffer(const Framebuffer&) = delete;
-    Framebuffer& operator=(const Framebuffer&) = delete;
-
-    Framebuffer(Framebuffer&&) = default;
-    Framebuffer& operator=(Framebuffer&&) = default;
-
-    void CreateFramebuffer(TextureCacheRuntime& runtime,
-                           std::span<ImageView*, NUM_RT> color_buffers, ImageView* depth_buffer,
-                           bool is_rescaled = false);
-
-    [[nodiscard]] VkFramebuffer Handle() const noexcept {
-        return *framebuffer;
-    }
-
-    [[nodiscard]] VkRenderPass RenderPass() const noexcept {
-        return renderpass;
-    }
-
-    [[nodiscard]] VkExtent2D RenderArea() const noexcept {
-        return render_area;
-    }
-
-    [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept {
-        return samples;
-    }
-
-    [[nodiscard]] u32 NumColorBuffers() const noexcept {
-        return num_color_buffers;
-    }
-
-    [[nodiscard]] u32 NumImages() const noexcept {
-        return num_images;
-    }
-
-    [[nodiscard]] const std::array<VkImage, 9>& Images() const noexcept {
-        return images;
-    }
-
-    [[nodiscard]] const std::array<VkImageSubresourceRange, 9>& ImageRanges() const noexcept {
-        return image_ranges;
-    }
-
-    [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept {
-        return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0;
-    }
-
-    [[nodiscard]] bool HasAspectDepthBit() const noexcept {
-        return has_depth;
-    }
-
-    [[nodiscard]] bool HasAspectStencilBit() const noexcept {
-        return has_stencil;
-    }
-
-    [[nodiscard]] bool IsRescaled() const noexcept {
-        return is_rescaled;
-    }
-
-private:
-    vk::Framebuffer framebuffer;
-    VkRenderPass renderpass{};
-    VkExtent2D render_area{};
-    VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
-    u32 num_color_buffers = 0;
-    u32 num_images = 0;
-    std::array<VkImage, 9> images{};
-    std::array<VkImageSubresourceRange, 9> image_ranges{};
-    std::array<size_t, NUM_RT> rt_map{};
-    bool has_depth{};
-    bool has_stencil{};
-    bool is_rescaled{};
-};
-
 struct TextureCacheParams {
     static constexpr bool ENABLE_VALIDATION = true;
     static constexpr bool FRAMEBUFFER_BLITS = false;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 425c8e23de..53fb57317f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -596,10 +596,10 @@ FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) {
         return framebuffer_id;
     }
     std::array<ImageView*, NUM_RT> color_buffers;
-    std::ranges::transform(key.color_buffer_ids, color_buffers.begin(),
-                           [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; });
-    ImageView* const depth_buffer =
-        key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
+    std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), [this](ImageViewId id) {
+        return id ? &slot_image_views[id] : nullptr;
+    });
+    ImageView* const depth_buffer = key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr;
     framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key);
     return framebuffer_id;
 }