diff --git a/src/audio_core/adsp/adsp.cpp b/src/audio_core/adsp/adsp.cpp index 48f0a63d4a..a578461f7c 100644 --- a/src/audio_core/adsp/adsp.cpp +++ b/src/audio_core/adsp/adsp.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -7,8 +10,8 @@ namespace AudioCore::ADSP { ADSP::ADSP(Core::System& system, Sink::Sink& sink) { - audio_renderer = std::make_unique(system, sink); - opus_decoder = std::make_unique(system); + audio_renderer.emplace(system, sink); + opus_decoder.emplace(system); opus_decoder->Send(Direction::DSP, OpusDecoder::Message::Start); if (opus_decoder->Receive(Direction::Host) != OpusDecoder::Message::StartOK) { LOG_ERROR(Service_Audio, "OpusDecoder failed to initialize."); @@ -17,11 +20,11 @@ ADSP::ADSP(Core::System& system, Sink::Sink& sink) { } AudioRenderer::AudioRenderer& ADSP::AudioRenderer() { - return *audio_renderer.get(); + return *audio_renderer; } OpusDecoder::OpusDecoder& ADSP::OpusDecoder() { - return *opus_decoder.get(); + return *opus_decoder; } } // namespace AudioCore::ADSP diff --git a/src/audio_core/adsp/adsp.h b/src/audio_core/adsp/adsp.h index a0c24a16a2..028d87939d 100644 --- a/src/audio_core/adsp/adsp.h +++ b/src/audio_core/adsp/adsp.h @@ -1,8 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once +#include + #include "audio_core/adsp/apps/audio_renderer/audio_renderer.h" #include "audio_core/adsp/apps/opus/opus_decoder.h" #include "common/common_types.h" @@ -45,8 +50,8 @@ public: private: /// AudioRenderer app - std::unique_ptr audio_renderer{}; - std::unique_ptr opus_decoder{}; + std::optional audio_renderer{}; + std::optional opus_decoder{}; }; } // namespace ADSP diff --git a/src/audio_core/opus/decoder.cpp b/src/audio_core/opus/decoder.cpp index e60a7d48d4..7d0cce74db 100644 --- a/src/audio_core/opus/decoder.cpp +++ b/src/audio_core/opus/decoder.cpp @@ -27,33 +27,31 @@ OpusDecoder::OpusDecoder(Core::System& system_, HardwareOpus& hardware_opus_) OpusDecoder::~OpusDecoder() { if (decode_object_initialized) { - hardware_opus.ShutdownDecodeObject(shared_buffer.get(), shared_buffer_size); + hardware_opus.ShutdownDecodeObject(shared_buffer.data(), shared_buffer.size()); } } -Result OpusDecoder::Initialize(const OpusParametersEx& params, - Kernel::KTransferMemory* transfer_memory, u64 transfer_memory_size) { +Result OpusDecoder::Initialize(const OpusParametersEx& params, Kernel::KTransferMemory* transfer_memory, u64 transfer_memory_size) { auto frame_size{params.use_large_frame_size ? 5760 : 1920}; - shared_buffer_size = transfer_memory_size; - shared_buffer = std::make_unique(shared_buffer_size); + shared_buffer.resize(transfer_memory_size); shared_memory_mapped = true; buffer_size = Common::AlignUp((frame_size * params.channel_count) / (48'000 / params.sample_rate), 16); - out_data = {shared_buffer.get() + shared_buffer_size - buffer_size, buffer_size}; + out_data = {shared_buffer.data() + shared_buffer.size() - buffer_size, buffer_size}; size_t in_data_size{0x600u}; in_data = {out_data.data() - in_data_size, in_data_size}; ON_RESULT_FAILURE { if (shared_memory_mapped) { shared_memory_mapped = false; - ASSERT(R_SUCCEEDED(hardware_opus.UnmapMemory(shared_buffer.get(), shared_buffer_size))); + ASSERT(R_SUCCEEDED(hardware_opus.UnmapMemory(shared_buffer.data(), shared_buffer.size()))); } }; R_TRY(hardware_opus.InitializeDecodeObject(params.sample_rate, params.channel_count, - shared_buffer.get(), shared_buffer_size)); + shared_buffer.data(), shared_buffer.size())); sample_rate = params.sample_rate; channel_count = params.channel_count; @@ -62,31 +60,29 @@ Result OpusDecoder::Initialize(const OpusParametersEx& params, R_SUCCEED(); } -Result OpusDecoder::Initialize(const OpusMultiStreamParametersEx& params, - Kernel::KTransferMemory* transfer_memory, u64 transfer_memory_size) { +Result OpusDecoder::Initialize(const OpusMultiStreamParametersEx& params, Kernel::KTransferMemory* transfer_memory, u64 transfer_memory_size) { auto frame_size{params.use_large_frame_size ? 5760 : 1920}; - shared_buffer_size = transfer_memory_size; - shared_buffer = std::make_unique(shared_buffer_size); + shared_buffer.resize(transfer_memory_size, 0); shared_memory_mapped = true; buffer_size = Common::AlignUp((frame_size * params.channel_count) / (48'000 / params.sample_rate), 16); - out_data = {shared_buffer.get() + shared_buffer_size - buffer_size, buffer_size}; + out_data = {shared_buffer.data() + shared_buffer.size() - buffer_size, buffer_size}; size_t in_data_size{Common::AlignUp(1500ull * params.total_stream_count, 64u)}; in_data = {out_data.data() - in_data_size, in_data_size}; ON_RESULT_FAILURE { if (shared_memory_mapped) { shared_memory_mapped = false; - ASSERT(R_SUCCEEDED(hardware_opus.UnmapMemory(shared_buffer.get(), shared_buffer_size))); + ASSERT(R_SUCCEEDED(hardware_opus.UnmapMemory(shared_buffer.data(), shared_buffer.size()))); } }; R_TRY(hardware_opus.InitializeMultiStreamDecodeObject( params.sample_rate, params.channel_count, params.total_stream_count, - params.stereo_stream_count, params.mappings.data(), shared_buffer.get(), - shared_buffer_size)); + params.stereo_stream_count, params.mappings.data(), shared_buffer.data(), + shared_buffer.size())); sample_rate = params.sample_rate; channel_count = params.channel_count; @@ -113,7 +109,7 @@ Result OpusDecoder::DecodeInterleaved(u32* out_data_size, u64* out_time_taken, ResultBufferTooSmall); if (!shared_memory_mapped) { - R_TRY(hardware_opus.MapMemory(shared_buffer.get(), shared_buffer_size)); + R_TRY(hardware_opus.MapMemory(shared_buffer.data(), shared_buffer.size())); shared_memory_mapped = true; } @@ -121,7 +117,7 @@ Result OpusDecoder::DecodeInterleaved(u32* out_data_size, u64* out_time_taken, R_TRY(hardware_opus.DecodeInterleaved(out_samples, out_data.data(), out_data.size_bytes(), channel_count, in_data.data(), header.size, - shared_buffer.get(), time_taken, reset)); + shared_buffer.data(), time_taken, reset)); std::memcpy(output_data.data(), out_data.data(), out_samples * channel_count * sizeof(s16)); @@ -136,7 +132,7 @@ Result OpusDecoder::DecodeInterleaved(u32* out_data_size, u64* out_time_taken, Result OpusDecoder::SetContext([[maybe_unused]] std::span context) { R_SUCCEED_IF(shared_memory_mapped); shared_memory_mapped = true; - R_RETURN(hardware_opus.MapMemory(shared_buffer.get(), shared_buffer_size)); + R_RETURN(hardware_opus.MapMemory(shared_buffer.data(), shared_buffer.size())); } Result OpusDecoder::DecodeInterleavedForMultiStream(u32* out_data_size, u64* out_time_taken, @@ -159,7 +155,7 @@ Result OpusDecoder::DecodeInterleavedForMultiStream(u32* out_data_size, u64* out ResultBufferTooSmall); if (!shared_memory_mapped) { - R_TRY(hardware_opus.MapMemory(shared_buffer.get(), shared_buffer_size)); + R_TRY(hardware_opus.MapMemory(shared_buffer.data(), shared_buffer.size())); shared_memory_mapped = true; } @@ -167,7 +163,7 @@ Result OpusDecoder::DecodeInterleavedForMultiStream(u32* out_data_size, u64* out R_TRY(hardware_opus.DecodeInterleavedForMultiStream( out_samples, out_data.data(), out_data.size_bytes(), channel_count, in_data.data(), - header.size, shared_buffer.get(), time_taken, reset)); + header.size, shared_buffer.data(), time_taken, reset)); std::memcpy(output_data.data(), out_data.data(), out_samples * channel_count * sizeof(s16)); diff --git a/src/audio_core/opus/decoder.h b/src/audio_core/opus/decoder.h index 1b8c257d43..33bf88e349 100644 --- a/src/audio_core/opus/decoder.h +++ b/src/audio_core/opus/decoder.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -36,8 +39,7 @@ public: private: Core::System& system; HardwareOpus& hardware_opus; - std::unique_ptr shared_buffer{}; - u64 shared_buffer_size; + std::vector shared_buffer{}; std::span in_data{}; std::span out_data{}; u64 buffer_size{}; diff --git a/src/common/fiber.cpp b/src/common/fiber.cpp index 4f0f2b6430..ea3da3d053 100644 --- a/src/common/fiber.cpp +++ b/src/common/fiber.cpp @@ -4,6 +4,7 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include #include "common/assert.h" @@ -14,100 +15,70 @@ namespace Common { -constexpr std::size_t default_stack_size = 512 * 1024; +constexpr size_t DEFAULT_STACK_SIZE = 128 * 4096; +constexpr u32 CANARY_VALUE = 0xDEADBEEF; struct Fiber::FiberImpl { - FiberImpl() : stack{default_stack_size}, rewind_stack{default_stack_size} {} + FiberImpl() {} - VirtualBuffer stack; - VirtualBuffer rewind_stack; + std::array stack{}; + std::array rewind_stack{}; + u32 canary = CANARY_VALUE; + + boost::context::detail::fcontext_t context{}; + boost::context::detail::fcontext_t rewind_context{}; std::mutex guard; std::function entry_point; std::function rewind_point; std::shared_ptr previous_fiber; - bool is_thread_fiber{}; - bool released{}; - u8* stack_limit{}; - u8* rewind_stack_limit{}; - boost::context::detail::fcontext_t context{}; - boost::context::detail::fcontext_t rewind_context{}; + u8* stack_limit = nullptr; + u8* rewind_stack_limit = nullptr; + bool is_thread_fiber = false; + bool released = false; }; void Fiber::SetRewindPoint(std::function&& rewind_func) { impl->rewind_point = std::move(rewind_func); } -void Fiber::Start(boost::context::detail::transfer_t& transfer) { - ASSERT(impl->previous_fiber != nullptr); - impl->previous_fiber->impl->context = transfer.fctx; - impl->previous_fiber->impl->guard.unlock(); - impl->previous_fiber.reset(); - impl->entry_point(); - UNREACHABLE(); -} - -void Fiber::OnRewind([[maybe_unused]] boost::context::detail::transfer_t& transfer) { - ASSERT(impl->context != nullptr); - impl->context = impl->rewind_context; - impl->rewind_context = nullptr; - u8* tmp = impl->stack_limit; - impl->stack_limit = impl->rewind_stack_limit; - impl->rewind_stack_limit = tmp; - impl->rewind_point(); - UNREACHABLE(); -} - -void Fiber::FiberStartFunc(boost::context::detail::transfer_t transfer) { - auto* fiber = static_cast(transfer.data); - fiber->Start(transfer); -} - -void Fiber::RewindStartFunc(boost::context::detail::transfer_t transfer) { - auto* fiber = static_cast(transfer.data); - fiber->OnRewind(transfer); -} - Fiber::Fiber(std::function&& entry_point_func) : impl{std::make_unique()} { impl->entry_point = std::move(entry_point_func); impl->stack_limit = impl->stack.data(); impl->rewind_stack_limit = impl->rewind_stack.data(); - u8* stack_base = impl->stack_limit + default_stack_size; - impl->context = - boost::context::detail::make_fcontext(stack_base, impl->stack.size(), FiberStartFunc); + u8* stack_base = impl->stack_limit + DEFAULT_STACK_SIZE; + impl->context = boost::context::detail::make_fcontext(stack_base, impl->stack.size(), [](boost::context::detail::transfer_t transfer) -> void { + auto* fiber = static_cast(transfer.data); + ASSERT(fiber && fiber->impl && fiber->impl->previous_fiber && fiber->impl->previous_fiber->impl); + ASSERT(fiber->impl->canary == CANARY_VALUE); + fiber->impl->previous_fiber->impl->context = transfer.fctx; + fiber->impl->previous_fiber->impl->guard.unlock(); + fiber->impl->previous_fiber.reset(); + fiber->impl->entry_point(); + UNREACHABLE(); + }); } Fiber::Fiber() : impl{std::make_unique()} {} Fiber::~Fiber() { - if (impl->released) { - return; - } - // Make sure the Fiber is not being used - const bool locked = impl->guard.try_lock(); - ASSERT_MSG(locked, "Destroying a fiber that's still running"); - if (locked) { - impl->guard.unlock(); + if (!impl->released) { + // Make sure the Fiber is not being used + const bool locked = impl->guard.try_lock(); + ASSERT_MSG(locked, "Destroying a fiber that's still running"); + if (locked) { + impl->guard.unlock(); + } } } void Fiber::Exit() { ASSERT_MSG(impl->is_thread_fiber, "Exiting non main thread fiber"); - if (!impl->is_thread_fiber) { - return; + if (impl->is_thread_fiber) { + impl->guard.unlock(); + impl->released = true; } - impl->guard.unlock(); - impl->released = true; -} - -void Fiber::Rewind() { - ASSERT(impl->rewind_point); - ASSERT(impl->rewind_context == nullptr); - u8* stack_base = impl->rewind_stack_limit + default_stack_size; - impl->rewind_context = - boost::context::detail::make_fcontext(stack_base, impl->stack.size(), RewindStartFunc); - boost::context::detail::jump_fcontext(impl->rewind_context, this); } void Fiber::YieldTo(std::weak_ptr weak_from, Fiber& to) { @@ -115,16 +86,15 @@ void Fiber::YieldTo(std::weak_ptr weak_from, Fiber& to) { to.impl->previous_fiber = weak_from.lock(); auto transfer = boost::context::detail::jump_fcontext(to.impl->context, &to); - // "from" might no longer be valid if the thread was killed if (auto from = weak_from.lock()) { if (from->impl->previous_fiber == nullptr) { - ASSERT_MSG(false, "previous_fiber is nullptr!"); - return; + ASSERT(false && "previous_fiber is nullptr!"); + } else { + from->impl->previous_fiber->impl->context = transfer.fctx; + from->impl->previous_fiber->impl->guard.unlock(); + from->impl->previous_fiber.reset(); } - from->impl->previous_fiber->impl->context = transfer.fctx; - from->impl->previous_fiber->impl->guard.unlock(); - from->impl->previous_fiber.reset(); } } diff --git a/src/common/fiber.h b/src/common/fiber.h index 8af6ae4d3a..eb128f4bb2 100644 --- a/src/common/fiber.h +++ b/src/common/fiber.h @@ -45,22 +45,12 @@ public: /// Fiber 'from' must be the currently running fiber. static void YieldTo(std::weak_ptr weak_from, Fiber& to); [[nodiscard]] static std::shared_ptr ThreadToFiber(); - void SetRewindPoint(std::function&& rewind_func); - - void Rewind(); - /// Only call from main thread's fiber void Exit(); - private: Fiber(); - - void OnRewind(boost::context::detail::transfer_t& transfer); void Start(boost::context::detail::transfer_t& transfer); - static void FiberStartFunc(boost::context::detail::transfer_t transfer); - static void RewindStartFunc(boost::context::detail::transfer_t transfer); - struct FiberImpl; std::unique_ptr impl; }; diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index e14bf3e651..4f9c240905 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -9,7 +12,6 @@ #include "common/x64/native_clock.h" #include "common/x64/rdtsc.h" #endif - #ifdef HAS_NCE #include "common/arm64/native_clock.h" #endif @@ -73,8 +75,4 @@ std::unique_ptr CreateOptimalClock() { #endif } -std::unique_ptr CreateStandardWallClock() { - return std::make_unique(); -} - } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 3a0c43909a..7ad6536930 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -84,8 +87,6 @@ protected: using CPUTickToGPUTickRatio = std::ratio; }; -std::unique_ptr CreateOptimalClock(); - -std::unique_ptr CreateStandardWallClock(); +[[nodiscard]] std::unique_ptr CreateOptimalClock(); } // namespace Common diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.cpp b/src/core/arm/dynarmic/arm_dynarmic_32.cpp index b57996cb8b..0fa4ca6f06 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_32.cpp @@ -16,170 +16,160 @@ namespace Core { using namespace Common::Literals; -class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks { -public: - explicit DynarmicCallbacks32(ArmDynarmic32& parent, Kernel::KProcess* process) - : m_parent{parent}, m_memory(process->GetMemory()), - m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()}, - m_check_memory_access{m_debugger_enabled || - !Settings::values.cpuopt_ignore_memory_aborts.GetValue()} {} +DynarmicCallbacks32::DynarmicCallbacks32(ArmDynarmic32& parent, Kernel::KProcess* process) + : m_parent{parent}, m_memory(process->GetMemory()) + , m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()} + , m_check_memory_access{m_debugger_enabled || !Settings::values.cpuopt_ignore_memory_aborts.GetValue()} +{} - u8 MemoryRead8(u32 vaddr) override { - CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read); - return m_memory.Read8(vaddr); - } - u16 MemoryRead16(u32 vaddr) override { - CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read); - return m_memory.Read16(vaddr); - } - u32 MemoryRead32(u32 vaddr) override { - CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read); - return m_memory.Read32(vaddr); - } - u64 MemoryRead64(u32 vaddr) override { - CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read); - return m_memory.Read64(vaddr); - } - std::optional MemoryReadCode(u32 vaddr) override { - if (!m_memory.IsValidVirtualAddressRange(vaddr, sizeof(u32))) { - return std::nullopt; - } - return m_memory.Read32(vaddr); - } +u8 DynarmicCallbacks32::MemoryRead8(u32 vaddr) { + CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read); + return m_memory.Read8(vaddr); +} +u16 DynarmicCallbacks32::MemoryRead16(u32 vaddr) { + CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read); + return m_memory.Read16(vaddr); +} +u32 DynarmicCallbacks32::MemoryRead32(u32 vaddr) { + CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read); + return m_memory.Read32(vaddr); +} +u64 DynarmicCallbacks32::MemoryRead64(u32 vaddr) { + CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read); + return m_memory.Read64(vaddr); +} +std::optional DynarmicCallbacks32::MemoryReadCode(u32 vaddr) { + if (!m_memory.IsValidVirtualAddressRange(vaddr, sizeof(u32))) + return std::nullopt; + return m_memory.Read32(vaddr); +} - void MemoryWrite8(u32 vaddr, u8 value) override { - if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) { - m_memory.Write8(vaddr, value); - } +void DynarmicCallbacks32::MemoryWrite8(u32 vaddr, u8 value) { + if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) { + m_memory.Write8(vaddr, value); } - void MemoryWrite16(u32 vaddr, u16 value) override { - if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) { - m_memory.Write16(vaddr, value); - } +} +void DynarmicCallbacks32::MemoryWrite16(u32 vaddr, u16 value) { + if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) { + m_memory.Write16(vaddr, value); } - void MemoryWrite32(u32 vaddr, u32 value) override { - if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) { - m_memory.Write32(vaddr, value); - } +} +void DynarmicCallbacks32::MemoryWrite32(u32 vaddr, u32 value) { + if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) { + m_memory.Write32(vaddr, value); } - void MemoryWrite64(u32 vaddr, u64 value) override { - if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) { - m_memory.Write64(vaddr, value); - } +} +void DynarmicCallbacks32::MemoryWrite64(u32 vaddr, u64 value) { + if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) { + m_memory.Write64(vaddr, value); } +} - bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override { - return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) && - m_memory.WriteExclusive8(vaddr, value, expected); - } - bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override { - return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) && - m_memory.WriteExclusive16(vaddr, value, expected); - } - bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override { - return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) && - m_memory.WriteExclusive32(vaddr, value, expected); - } - bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override { - return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) && - m_memory.WriteExclusive64(vaddr, value, expected); - } +bool DynarmicCallbacks32::MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) { + return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) && + m_memory.WriteExclusive8(vaddr, value, expected); +} +bool DynarmicCallbacks32::MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) { + return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) && + m_memory.WriteExclusive16(vaddr, value, expected); +} +bool DynarmicCallbacks32::MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) { + return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) && + m_memory.WriteExclusive32(vaddr, value, expected); +} +bool DynarmicCallbacks32::MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) { + return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) && + m_memory.WriteExclusive64(vaddr, value, expected); +} - void InterpreterFallback(u32 pc, std::size_t num_instructions) override { - m_parent.LogBacktrace(m_process); - LOG_ERROR(Core_ARM, - "Unimplemented instruction @ {:#X} for {} instructions (instr = {:08X})", pc, - num_instructions, m_memory.Read32(pc)); - } +void DynarmicCallbacks32::InterpreterFallback(u32 pc, std::size_t num_instructions) { + m_parent.LogBacktrace(m_process); + LOG_ERROR(Core_ARM, + "Unimplemented instruction @ {:#X} for {} instructions (instr = {:08X})", pc, + num_instructions, m_memory.Read32(pc)); +} - void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override { - switch (exception) { - case Dynarmic::A32::Exception::NoExecuteFault: - LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#08x}", pc); - ReturnException(pc, PrefetchAbort); +void DynarmicCallbacks32::ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) { + switch (exception) { + case Dynarmic::A32::Exception::NoExecuteFault: + LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#08x}", pc); + ReturnException(pc, PrefetchAbort); + return; + default: + if (m_debugger_enabled) { + ReturnException(pc, InstructionBreakpoint); return; - default: - if (m_debugger_enabled) { - ReturnException(pc, InstructionBreakpoint); - return; - } - - m_parent.LogBacktrace(m_process); - LOG_CRITICAL(Core_ARM, - "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X}, thumb = {})", - exception, pc, m_memory.Read32(pc), m_parent.IsInThumbMode()); } + + m_parent.LogBacktrace(m_process); + LOG_CRITICAL(Core_ARM, + "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X}, thumb = {})", + exception, pc, m_memory.Read32(pc), m_parent.IsInThumbMode()); } +} - void CallSVC(u32 swi) override { - m_parent.m_svc_swi = swi; - m_parent.m_jit->HaltExecution(SupervisorCall); - } +void DynarmicCallbacks32::CallSVC(u32 swi) { + m_parent.m_svc_swi = swi; + m_parent.m_jit->HaltExecution(SupervisorCall); +} - void AddTicks(u64 ticks) override { - ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled"); +void DynarmicCallbacks32::AddTicks(u64 ticks) { + ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled"); - // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a - // rough approximation of the amount of executed ticks in the system, it may be thrown off - // if not all cores are doing a similar amount of work. Instead of doing this, we should - // device a way so that timing is consistent across all cores without increasing the ticks 4 - // times. - u64 amortized_ticks = ticks / Core::Hardware::NUM_CPU_CORES; - // Always execute at least one tick. - amortized_ticks = std::max(amortized_ticks, 1); + // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a + // rough approximation of the amount of executed ticks in the system, it may be thrown off + // if not all cores are doing a similar amount of work. Instead of doing this, we should + // device a way so that timing is consistent across all cores without increasing the ticks 4 + // times. + u64 amortized_ticks = ticks / Core::Hardware::NUM_CPU_CORES; + // Always execute at least one tick. + amortized_ticks = std::max(amortized_ticks, 1); - m_parent.m_system.CoreTiming().AddTicks(amortized_ticks); - } + m_parent.m_system.CoreTiming().AddTicks(amortized_ticks); +} - u64 GetTicksRemaining() override { - ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled"); +u64 DynarmicCallbacks32::GetTicksRemaining() { + ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled"); - return std::max(m_parent.m_system.CoreTiming().GetDowncount(), 0); - } - - bool CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type) { - if (!m_check_memory_access) { - return true; - } - - if (!m_memory.IsValidVirtualAddressRange(addr, size)) { - LOG_CRITICAL(Core_ARM, "Stopping execution due to unmapped memory access at {:#x}", - addr); - m_parent.m_jit->HaltExecution(PrefetchAbort); - return false; - } - - if (!m_debugger_enabled) { - return true; - } - - const auto match{m_parent.MatchingWatchpoint(addr, size, type)}; - if (match) { - m_parent.m_halted_watchpoint = match; - m_parent.m_jit->HaltExecution(DataAbort); - return false; - } + return std::max(m_parent.m_system.CoreTiming().GetDowncount(), 0); +} +bool DynarmicCallbacks32::CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type) { + if (!m_check_memory_access) { return true; } - void ReturnException(u32 pc, Dynarmic::HaltReason hr) { - m_parent.GetContext(m_parent.m_breakpoint_context); - m_parent.m_breakpoint_context.pc = pc; - m_parent.m_breakpoint_context.r[15] = pc; - m_parent.m_jit->HaltExecution(hr); + if (!m_memory.IsValidVirtualAddressRange(addr, size)) { + LOG_CRITICAL(Core_ARM, "Stopping execution due to unmapped memory access at {:#x}", + addr); + m_parent.m_jit->HaltExecution(PrefetchAbort); + return false; } - ArmDynarmic32& m_parent; - Core::Memory::Memory& m_memory; - Kernel::KProcess* m_process{}; - const bool m_debugger_enabled{}; - const bool m_check_memory_access{}; -}; + if (!m_debugger_enabled) { + return true; + } -std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* page_table) const { + const auto match{m_parent.MatchingWatchpoint(addr, size, type)}; + if (match) { + m_parent.m_halted_watchpoint = match; + m_parent.m_jit->HaltExecution(DataAbort); + return false; + } + + return true; +} + +void DynarmicCallbacks32::ReturnException(u32 pc, Dynarmic::HaltReason hr) { + m_parent.GetContext(m_parent.m_breakpoint_context); + m_parent.m_breakpoint_context.pc = pc; + m_parent.m_breakpoint_context.r[15] = pc; + m_parent.m_jit->HaltExecution(hr); +} + +void ArmDynarmic32::MakeJit(Common::PageTable* page_table) { Dynarmic::A32::UserConfig config; - config.callbacks = m_cb.get(); + config.callbacks = std::addressof(*m_cb); config.coprocessors[15] = m_cp15; config.define_unpredictable_behaviour = true; @@ -315,7 +305,7 @@ std::shared_ptr ArmDynarmic32::MakeJit(Common::PageTable* pa default: break; } - return std::make_unique(config); + m_jit.emplace(config); } static std::pair FpscrToFpsrFpcr(u32 fpscr) { @@ -360,21 +350,17 @@ u32 ArmDynarmic32::GetSvcNumber() const { } void ArmDynarmic32::GetSvcArguments(std::span args) const { - Dynarmic::A32::Jit& j = *m_jit; + Dynarmic::A32::Jit const& j = *m_jit; auto& gpr = j.Regs(); - - for (size_t i = 0; i < 8; i++) { + for (size_t i = 0; i < 8; i++) args[i] = gpr[i]; - } } void ArmDynarmic32::SetSvcArguments(std::span args) { Dynarmic::A32::Jit& j = *m_jit; auto& gpr = j.Regs(); - - for (size_t i = 0; i < 8; i++) { - gpr[i] = static_cast(args[i]); - } + for (size_t i = 0; i < 8; i++) + gpr[i] = u32(args[i]); } const Kernel::DebugWatchpoint* ArmDynarmic32::HaltedWatchpoint() const { @@ -387,11 +373,12 @@ void ArmDynarmic32::RewindBreakpointInstruction() { ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process, DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index) - : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor}, - m_cb(std::make_unique(*this, process)), - m_cp15(std::make_shared(*this)), m_core_index{core_index} { + : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor} + , m_cb(std::make_optional(*this, process)) + , m_cp15(std::make_shared(*this)), m_core_index{core_index} +{ auto& page_table_impl = process->GetPageTable().GetBasePageTable().GetImpl(); - m_jit = MakeJit(&page_table_impl); + MakeJit(&page_table_impl); } ArmDynarmic32::~ArmDynarmic32() = default; @@ -401,23 +388,18 @@ void ArmDynarmic32::SetTpidrroEl0(u64 value) { } void ArmDynarmic32::GetContext(Kernel::Svc::ThreadContext& ctx) const { - Dynarmic::A32::Jit& j = *m_jit; + Dynarmic::A32::Jit const& j = *m_jit; auto& gpr = j.Regs(); auto& fpr = j.ExtRegs(); - - for (size_t i = 0; i < 16; i++) { + for (size_t i = 0; i < 16; i++) ctx.r[i] = gpr[i]; - } - ctx.fp = gpr[11]; ctx.sp = gpr[13]; ctx.lr = gpr[14]; ctx.pc = gpr[15]; ctx.pstate = j.Cpsr(); - static_assert(sizeof(fpr) <= sizeof(ctx.v)); std::memcpy(ctx.v.data(), &fpr, sizeof(fpr)); - auto [fpsr, fpcr] = FpscrToFpsrFpcr(j.Fpscr()); ctx.fpcr = fpcr; ctx.fpsr = fpsr; @@ -428,16 +410,11 @@ void ArmDynarmic32::SetContext(const Kernel::Svc::ThreadContext& ctx) { Dynarmic::A32::Jit& j = *m_jit; auto& gpr = j.Regs(); auto& fpr = j.ExtRegs(); - - for (size_t i = 0; i < 16; i++) { - gpr[i] = static_cast(ctx.r[i]); - } - + for (size_t i = 0; i < 16; i++) + gpr[i] = u32(ctx.r[i]); j.SetCpsr(ctx.pstate); - static_assert(sizeof(fpr) <= sizeof(ctx.v)); std::memcpy(&fpr, ctx.v.data(), sizeof(fpr)); - j.SetFpscr(FpsrFpcrToFpscr(ctx.fpsr, ctx.fpcr)); m_cp15->uprw = static_cast(ctx.tpidr); } diff --git a/src/core/arm/dynarmic/arm_dynarmic_32.h b/src/core/arm/dynarmic/arm_dynarmic_32.h index b580efe615..1934934bd9 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_32.h +++ b/src/core/arm/dynarmic/arm_dynarmic_32.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -12,16 +15,50 @@ namespace Core::Memory { class Memory; } +namespace Kernel { +enum class DebugWatchpointType : u8; +class KPRocess; +} + namespace Core { -class DynarmicCallbacks32; +class ArmDynarmic32; class DynarmicCP15; class System; +class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks { +public: + explicit DynarmicCallbacks32(ArmDynarmic32& parent, Kernel::KProcess* process); + u8 MemoryRead8(u32 vaddr) override; + u16 MemoryRead16(u32 vaddr) override; + u32 MemoryRead32(u32 vaddr) override; + u64 MemoryRead64(u32 vaddr) override; + std::optional MemoryReadCode(u32 vaddr) override; + void MemoryWrite8(u32 vaddr, u8 value) override; + void MemoryWrite16(u32 vaddr, u16 value) override; + void MemoryWrite32(u32 vaddr, u32 value) override; + void MemoryWrite64(u32 vaddr, u64 value) override; + bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override; + bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override; + bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override; + bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override; + void InterpreterFallback(u32 pc, std::size_t num_instructions) override; + void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override; + void CallSVC(u32 swi) override; + void AddTicks(u64 ticks) override; + u64 GetTicksRemaining() override; + bool CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type); + void ReturnException(u32 pc, Dynarmic::HaltReason hr); + ArmDynarmic32& m_parent; + Core::Memory::Memory& m_memory; + Kernel::KProcess* m_process{}; + const bool m_debugger_enabled{}; + const bool m_check_memory_access{}; +}; + class ArmDynarmic32 final : public ArmInterface { public: - ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process, - DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index); + ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProcess* process, DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index); ~ArmDynarmic32() override; Architecture GetArchitecture() const override { @@ -57,13 +94,13 @@ private: friend class DynarmicCallbacks32; friend class DynarmicCP15; - std::shared_ptr MakeJit(Common::PageTable* page_table) const; + void MakeJit(Common::PageTable* page_table); - std::unique_ptr m_cb{}; + std::optional m_cb{}; std::shared_ptr m_cp15{}; std::size_t m_core_index{}; - std::shared_ptr m_jit{}; + std::optional m_jit{}; // SVC callback u32 m_svc_swi{}; diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.cpp b/src/core/arm/dynarmic/arm_dynarmic_64.cpp index ba6178c1e4..92e1a70458 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic_64.cpp @@ -13,223 +13,203 @@ namespace Core { -using Vector = Dynarmic::A64::Vector; using namespace Common::Literals; -class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks { -public: - explicit DynarmicCallbacks64(ArmDynarmic64& parent, Kernel::KProcess* process) - : m_parent{parent}, m_memory(process->GetMemory()), - m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()}, - m_check_memory_access{m_debugger_enabled || - !Settings::values.cpuopt_ignore_memory_aborts.GetValue()} {} +DynarmicCallbacks64::DynarmicCallbacks64(ArmDynarmic64& parent, Kernel::KProcess* process) + : m_parent{parent}, m_memory(process->GetMemory()) + , m_process(process), m_debugger_enabled{parent.m_system.DebuggerEnabled()} + , m_check_memory_access{m_debugger_enabled || !Settings::values.cpuopt_ignore_memory_aborts.GetValue()} +{} - u8 MemoryRead8(u64 vaddr) override { - CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read); - return m_memory.Read8(vaddr); - } - u16 MemoryRead16(u64 vaddr) override { - CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read); - return m_memory.Read16(vaddr); - } - u32 MemoryRead32(u64 vaddr) override { - CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read); - return m_memory.Read32(vaddr); - } - u64 MemoryRead64(u64 vaddr) override { - CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read); - return m_memory.Read64(vaddr); - } - Vector MemoryRead128(u64 vaddr) override { - CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Read); - return {m_memory.Read64(vaddr), m_memory.Read64(vaddr + 8)}; - } - std::optional MemoryReadCode(u64 vaddr) override { - if (!m_memory.IsValidVirtualAddressRange(vaddr, sizeof(u32))) { - return std::nullopt; - } - return m_memory.Read32(vaddr); - } +u8 DynarmicCallbacks64::MemoryRead8(u64 vaddr) { + CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read); + return m_memory.Read8(vaddr); +} +u16 DynarmicCallbacks64::MemoryRead16(u64 vaddr) { + CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read); + return m_memory.Read16(vaddr); +} +u32 DynarmicCallbacks64::MemoryRead32(u64 vaddr) { + CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read); + return m_memory.Read32(vaddr); +} +u64 DynarmicCallbacks64::MemoryRead64(u64 vaddr) { + CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read); + return m_memory.Read64(vaddr); +} +Dynarmic::A64::Vector DynarmicCallbacks64::MemoryRead128(u64 vaddr) { + CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Read); + return {m_memory.Read64(vaddr), m_memory.Read64(vaddr + 8)}; +} +std::optional DynarmicCallbacks64::MemoryReadCode(u64 vaddr) { + if (!m_memory.IsValidVirtualAddressRange(vaddr, sizeof(u32))) + return std::nullopt; + return m_memory.Read32(vaddr); +} - void MemoryWrite8(u64 vaddr, u8 value) override { - if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) { - m_memory.Write8(vaddr, value); - } +void DynarmicCallbacks64::MemoryWrite8(u64 vaddr, u8 value) { + if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) { + m_memory.Write8(vaddr, value); } - void MemoryWrite16(u64 vaddr, u16 value) override { - if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) { - m_memory.Write16(vaddr, value); - } +} +void DynarmicCallbacks64::MemoryWrite16(u64 vaddr, u16 value) { + if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) { + m_memory.Write16(vaddr, value); } - void MemoryWrite32(u64 vaddr, u32 value) override { - if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) { - m_memory.Write32(vaddr, value); - } +} +void DynarmicCallbacks64::MemoryWrite32(u64 vaddr, u32 value) { + if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) { + m_memory.Write32(vaddr, value); } - void MemoryWrite64(u64 vaddr, u64 value) override { - if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) { - m_memory.Write64(vaddr, value); - } +} +void DynarmicCallbacks64::MemoryWrite64(u64 vaddr, u64 value) { + if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) { + m_memory.Write64(vaddr, value); } - void MemoryWrite128(u64 vaddr, Vector value) override { - if (CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write)) { - m_memory.Write64(vaddr, value[0]); - m_memory.Write64(vaddr + 8, value[1]); - } +} +void DynarmicCallbacks64::MemoryWrite128(u64 vaddr, Dynarmic::A64::Vector value) { + if (CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write)) { + m_memory.Write64(vaddr, value[0]); + m_memory.Write64(vaddr + 8, value[1]); } +} - bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override { - return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) && - m_memory.WriteExclusive8(vaddr, value, expected); - } - bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override { - return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) && - m_memory.WriteExclusive16(vaddr, value, expected); - } - bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override { - return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) && - m_memory.WriteExclusive32(vaddr, value, expected); - } - bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override { - return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) && - m_memory.WriteExclusive64(vaddr, value, expected); - } - bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override { - return CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write) && - m_memory.WriteExclusive128(vaddr, value, expected); - } +bool DynarmicCallbacks64::MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) { + return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) && + m_memory.WriteExclusive8(vaddr, value, expected); +} +bool DynarmicCallbacks64::MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) { + return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) && + m_memory.WriteExclusive16(vaddr, value, expected); +} +bool DynarmicCallbacks64::MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) { + return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) && + m_memory.WriteExclusive32(vaddr, value, expected); +} +bool DynarmicCallbacks64::MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) { + return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) && + m_memory.WriteExclusive64(vaddr, value, expected); +} +bool DynarmicCallbacks64::MemoryWriteExclusive128(u64 vaddr, Dynarmic::A64::Vector value, Dynarmic::A64::Vector expected) { + return CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write) && + m_memory.WriteExclusive128(vaddr, value, expected); +} - void InterpreterFallback(u64 pc, std::size_t num_instructions) override { - m_parent.LogBacktrace(m_process); - LOG_ERROR(Core_ARM, - "Unimplemented instruction @ {:#X} for {} instructions (instr = {:08X})", pc, - num_instructions, m_memory.Read32(pc)); +void DynarmicCallbacks64::InterpreterFallback(u64 pc, std::size_t num_instructions) { + m_parent.LogBacktrace(m_process); + LOG_ERROR(Core_ARM, "Unimplemented instruction @ {:#X} for {} instructions (instr = {:08X})", pc, + num_instructions, m_memory.Read32(pc)); + ReturnException(pc, PrefetchAbort); +} + +void DynarmicCallbacks64::InstructionCacheOperationRaised(Dynarmic::A64::InstructionCacheOperation op, u64 value) { + switch (op) { + case Dynarmic::A64::InstructionCacheOperation::InvalidateByVAToPoU: { + static constexpr u64 ICACHE_LINE_SIZE = 64; + const u64 cache_line_start = value & ~(ICACHE_LINE_SIZE - 1); + m_parent.InvalidateCacheRange(cache_line_start, ICACHE_LINE_SIZE); + break; + } + case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoU: + m_parent.ClearInstructionCache(); + break; + case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoUInnerSharable: + default: + LOG_DEBUG(Core_ARM, "Unprocesseed instruction cache operation: {}", op); + break; + } + m_parent.m_jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation); +} + +void DynarmicCallbacks64::ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) { + switch (exception) { + case Dynarmic::A64::Exception::WaitForInterrupt: + case Dynarmic::A64::Exception::WaitForEvent: + case Dynarmic::A64::Exception::SendEvent: + case Dynarmic::A64::Exception::SendEventLocal: + case Dynarmic::A64::Exception::Yield: + LOG_TRACE(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast(exception), pc, m_memory.Read32(pc)); + return; + case Dynarmic::A64::Exception::NoExecuteFault: + LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#016x}", pc); ReturnException(pc, PrefetchAbort); - } - - void InstructionCacheOperationRaised(Dynarmic::A64::InstructionCacheOperation op, - u64 value) override { - switch (op) { - case Dynarmic::A64::InstructionCacheOperation::InvalidateByVAToPoU: { - static constexpr u64 ICACHE_LINE_SIZE = 64; - - const u64 cache_line_start = value & ~(ICACHE_LINE_SIZE - 1); - m_parent.InvalidateCacheRange(cache_line_start, ICACHE_LINE_SIZE); - break; - } - case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoU: - m_parent.ClearInstructionCache(); - break; - case Dynarmic::A64::InstructionCacheOperation::InvalidateAllToPoUInnerSharable: - default: - LOG_DEBUG(Core_ARM, "Unprocesseed instruction cache operation: {}", op); - break; - } - - m_parent.m_jit->HaltExecution(Dynarmic::HaltReason::CacheInvalidation); - } - - void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override { - switch (exception) { - case Dynarmic::A64::Exception::WaitForInterrupt: - case Dynarmic::A64::Exception::WaitForEvent: - case Dynarmic::A64::Exception::SendEvent: - case Dynarmic::A64::Exception::SendEventLocal: - case Dynarmic::A64::Exception::Yield: - LOG_TRACE(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast(exception), pc, m_memory.Read32(pc)); - return; - case Dynarmic::A64::Exception::NoExecuteFault: - LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#016x}", pc); - ReturnException(pc, PrefetchAbort); - return; - default: - if (m_debugger_enabled) { - ReturnException(pc, InstructionBreakpoint); - } else { - m_parent.LogBacktrace(m_process); - LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast(exception), pc, m_memory.Read32(pc)); - } + return; + default: + if (m_debugger_enabled) { + ReturnException(pc, InstructionBreakpoint); + } else { + m_parent.LogBacktrace(m_process); + LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast(exception), pc, m_memory.Read32(pc)); } } +} - void CallSVC(u32 svc) override { - m_parent.m_svc = svc; - m_parent.m_jit->HaltExecution(SupervisorCall); - } +void DynarmicCallbacks64::CallSVC(u32 svc) { + m_parent.m_svc = svc; + m_parent.m_jit->HaltExecution(SupervisorCall); +} - void AddTicks(u64 ticks) override { - ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled"); +void DynarmicCallbacks64::AddTicks(u64 ticks) { + ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled"); - // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a - // rough approximation of the amount of executed ticks in the system, it may be thrown off - // if not all cores are doing a similar amount of work. Instead of doing this, we should - // device a way so that timing is consistent across all cores without increasing the ticks 4 - // times. - u64 amortized_ticks = ticks / Core::Hardware::NUM_CPU_CORES; - // Always execute at least one tick. - amortized_ticks = std::max(amortized_ticks, 1); + // Divide the number of ticks by the amount of CPU cores. TODO(Subv): This yields only a + // rough approximation of the amount of executed ticks in the system, it may be thrown off + // if not all cores are doing a similar amount of work. Instead of doing this, we should + // device a way so that timing is consistent across all cores without increasing the ticks 4 + // times. + u64 amortized_ticks = ticks / Core::Hardware::NUM_CPU_CORES; + // Always execute at least one tick. + amortized_ticks = std::max(amortized_ticks, 1); - m_parent.m_system.CoreTiming().AddTicks(amortized_ticks); - } + m_parent.m_system.CoreTiming().AddTicks(amortized_ticks); +} - u64 GetTicksRemaining() override { - ASSERT_MSG(!m_parent.m_uses_wall_clock, "Dynarmic ticking disabled"); +u64 DynarmicCallbacks64::GetTicksRemaining() { + ASSERT(!m_parent.m_uses_wall_clock && "Dynarmic ticking disabled"); + return std::max(m_parent.m_system.CoreTiming().GetDowncount(), 0); +} - return std::max(m_parent.m_system.CoreTiming().GetDowncount(), 0); - } - - u64 GetCNTPCT() override { - return m_parent.m_system.CoreTiming().GetClockTicks(); - } - - bool CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type) { - if (!m_check_memory_access) { - return true; - } - - if (!m_memory.IsValidVirtualAddressRange(addr, size)) { - LOG_CRITICAL(Core_ARM, "Stopping execution due to unmapped memory access at {:#x}", - addr); - m_parent.m_jit->HaltExecution(PrefetchAbort); - return false; - } - - if (!m_debugger_enabled) { - return true; - } - - const auto match{m_parent.MatchingWatchpoint(addr, size, type)}; - if (match) { - m_parent.m_halted_watchpoint = match; - m_parent.m_jit->HaltExecution(DataAbort); - return false; - } +u64 DynarmicCallbacks64::GetCNTPCT() { + return m_parent.m_system.CoreTiming().GetClockTicks(); +} +bool DynarmicCallbacks64::CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type) { + if (!m_check_memory_access) { return true; } - void ReturnException(u64 pc, Dynarmic::HaltReason hr) { - m_parent.GetContext(m_parent.m_breakpoint_context); - m_parent.m_breakpoint_context.pc = pc; - m_parent.m_jit->HaltExecution(hr); + if (!m_memory.IsValidVirtualAddressRange(addr, size)) { + LOG_CRITICAL(Core_ARM, "Stopping execution due to unmapped memory access at {:#x}", + addr); + m_parent.m_jit->HaltExecution(PrefetchAbort); + return false; } - ArmDynarmic64& m_parent; - Core::Memory::Memory& m_memory; - u64 m_tpidrro_el0{}; - u64 m_tpidr_el0{}; - Kernel::KProcess* m_process{}; - const bool m_debugger_enabled{}; - const bool m_check_memory_access{}; - static constexpr u64 MinimumRunCycles = 10000U; -}; + if (!m_debugger_enabled) { + return true; + } -std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* page_table, - std::size_t address_space_bits) const { + const auto match{m_parent.MatchingWatchpoint(addr, size, type)}; + if (match) { + m_parent.m_halted_watchpoint = match; + m_parent.m_jit->HaltExecution(DataAbort); + return false; + } + + return true; +} + +void DynarmicCallbacks64::ReturnException(u64 pc, Dynarmic::HaltReason hr) { + m_parent.GetContext(m_parent.m_breakpoint_context); + m_parent.m_breakpoint_context.pc = pc; + m_parent.m_jit->HaltExecution(hr); +} + +void ArmDynarmic64::MakeJit(Common::PageTable* page_table, std::size_t address_space_bits) { Dynarmic::A64::UserConfig config; // Callbacks - config.callbacks = m_cb.get(); + config.callbacks = std::addressof(*m_cb); // Memory if (page_table) { @@ -375,7 +355,7 @@ std::shared_ptr ArmDynarmic64::MakeJit(Common::PageTable* pa default: break; } - return std::make_shared(config); + m_jit.emplace(config); } HaltReason ArmDynarmic64::RunThread(Kernel::KThread* thread) { @@ -393,19 +373,15 @@ u32 ArmDynarmic64::GetSvcNumber() const { } void ArmDynarmic64::GetSvcArguments(std::span args) const { - Dynarmic::A64::Jit& j = *m_jit; - - for (size_t i = 0; i < 8; i++) { + Dynarmic::A64::Jit const& j = *m_jit; + for (size_t i = 0; i < 8; i++) args[i] = j.GetRegister(i); - } } void ArmDynarmic64::SetSvcArguments(std::span args) { Dynarmic::A64::Jit& j = *m_jit; - - for (size_t i = 0; i < 8; i++) { + for (size_t i = 0; i < 8; i++) j.SetRegister(i, args[i]); - } } const Kernel::DebugWatchpoint* ArmDynarmic64::HaltedWatchpoint() const { @@ -416,13 +392,14 @@ void ArmDynarmic64::RewindBreakpointInstruction() { this->SetContext(m_breakpoint_context); } -ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process, - DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index) - : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor}, - m_cb(std::make_unique(*this, process)), m_core_index{core_index} { +ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process, DynarmicExclusiveMonitor& exclusive_monitor, std::size_t core_index) + : ArmInterface{uses_wall_clock}, m_system{system}, m_exclusive_monitor{exclusive_monitor} + , m_cb(std::make_optional(*this, process)) + , m_core_index{core_index} +{ auto& page_table = process->GetPageTable().GetBasePageTable(); auto& page_table_impl = page_table.GetImpl(); - m_jit = MakeJit(&page_table_impl, page_table.GetAddressSpaceWidth()); + MakeJit(&page_table_impl, page_table.GetAddressSpaceWidth()); } ArmDynarmic64::~ArmDynarmic64() = default; @@ -432,17 +409,14 @@ void ArmDynarmic64::SetTpidrroEl0(u64 value) { } void ArmDynarmic64::GetContext(Kernel::Svc::ThreadContext& ctx) const { - Dynarmic::A64::Jit& j = *m_jit; + Dynarmic::A64::Jit const& j = *m_jit; auto gpr = j.GetRegisters(); auto fpr = j.GetVectors(); - // TODO: this is inconvenient - for (size_t i = 0; i < 29; i++) { + for (size_t i = 0; i < 29; i++) ctx.r[i] = gpr[i]; - } ctx.fp = gpr[29]; ctx.lr = gpr[30]; - ctx.sp = j.GetSP(); ctx.pc = j.GetPC(); ctx.pstate = j.GetPstate(); @@ -454,16 +428,12 @@ void ArmDynarmic64::GetContext(Kernel::Svc::ThreadContext& ctx) const { void ArmDynarmic64::SetContext(const Kernel::Svc::ThreadContext& ctx) { Dynarmic::A64::Jit& j = *m_jit; - // TODO: this is inconvenient std::array gpr; - - for (size_t i = 0; i < 29; i++) { + for (size_t i = 0; i < 29; i++) gpr[i] = ctx.r[i]; - } gpr[29] = ctx.fp; gpr[30] = ctx.lr; - j.SetRegisters(gpr); j.SetSP(ctx.sp); j.SetPC(ctx.pc); diff --git a/src/core/arm/dynarmic/arm_dynarmic_64.h b/src/core/arm/dynarmic/arm_dynarmic_64.h index 08cd982b30..2ea1505ce7 100644 --- a/src/core/arm/dynarmic/arm_dynarmic_64.h +++ b/src/core/arm/dynarmic/arm_dynarmic_64.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -17,12 +20,57 @@ namespace Core::Memory { class Memory; } +namespace Kernel { +enum class DebugWatchpointType : u8; +class KPRocess; +} + namespace Core { -class DynarmicCallbacks64; +class ArmDynarmic64; class DynarmicExclusiveMonitor; class System; +class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks { +public: + explicit DynarmicCallbacks64(ArmDynarmic64& parent, Kernel::KProcess* process); + + u8 MemoryRead8(u64 vaddr) override; + u16 MemoryRead16(u64 vaddr) override; + u32 MemoryRead32(u64 vaddr) override; + u64 MemoryRead64(u64 vaddr) override; + Dynarmic::A64::Vector MemoryRead128(u64 vaddr) override; + std::optional MemoryReadCode(u64 vaddr) override; + void MemoryWrite8(u64 vaddr, u8 value) override; + void MemoryWrite16(u64 vaddr, u16 value) override; + void MemoryWrite32(u64 vaddr, u32 value) override; + void MemoryWrite64(u64 vaddr, u64 value) override; + void MemoryWrite128(u64 vaddr, Dynarmic::A64::Vector value) override; + bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override; + bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override; + bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override; + bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override; + bool MemoryWriteExclusive128(u64 vaddr, Dynarmic::A64::Vector value, Dynarmic::A64::Vector expected) override; + void InterpreterFallback(u64 pc, std::size_t num_instructions) override; + void InstructionCacheOperationRaised(Dynarmic::A64::InstructionCacheOperation op, u64 value) override; + void ExceptionRaised(u64 pc, Dynarmic::A64::Exception exception) override; + void CallSVC(u32 svc) override; + void AddTicks(u64 ticks) override; + u64 GetTicksRemaining() override; + u64 GetCNTPCT() override; + bool CheckMemoryAccess(u64 addr, u64 size, Kernel::DebugWatchpointType type); + void ReturnException(u64 pc, Dynarmic::HaltReason hr); + + ArmDynarmic64& m_parent; + Core::Memory::Memory& m_memory; + u64 m_tpidrro_el0{}; + u64 m_tpidr_el0{}; + Kernel::KProcess* m_process{}; + const bool m_debugger_enabled{}; + const bool m_check_memory_access{}; + static constexpr u64 MinimumRunCycles = 10000U; +}; + class ArmDynarmic64 final : public ArmInterface { public: ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProcess* process, @@ -59,12 +107,11 @@ private: private: friend class DynarmicCallbacks64; - std::shared_ptr MakeJit(Common::PageTable* page_table, - std::size_t address_space_bits) const; - std::unique_ptr m_cb{}; + void MakeJit(Common::PageTable* page_table, std::size_t address_space_bits); + std::optional m_cb{}; std::size_t m_core_index{}; - std::shared_ptr m_jit{}; + std::optional m_jit{}; // SVC callback u32 m_svc{}; diff --git a/src/core/core.cpp b/src/core/core.cpp index bf97184f8f..aea2b2b060 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -112,11 +112,10 @@ struct System::Impl { u64 program_id; void Initialize(System& system) { - device_memory = std::make_unique(); + device_memory.emplace(); is_multicore = Settings::values.use_multi_core.GetValue(); - extended_memory_layout = - Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb; + extended_memory_layout = Settings::values.memory_layout_mode.GetValue() != Settings::MemoryLayout::Memory_4Gb; core_timing.SetMulticore(is_multicore); core_timing.Initialize([&system]() { system.RegisterHostThread(); }); @@ -132,7 +131,7 @@ struct System::Impl { // Create default implementations of applets if one is not provided. frontend_applets.SetDefaultAppletsIfMissing(); - is_async_gpu = Settings::values.use_asynchronous_gpu_emulation.GetValue(); + auto const is_async_gpu = Settings::values.use_asynchronous_gpu_emulation.GetValue(); kernel.SetMulticore(is_multicore); cpu_manager.SetMulticore(is_multicore); @@ -254,7 +253,7 @@ struct System::Impl { } void InitializeDebugger(System& system, u16 port) { - debugger = std::make_unique(system, port); + debugger.emplace(system, port); } void InitializeKernel(System& system) { @@ -268,24 +267,22 @@ struct System::Impl { } SystemResultStatus SetupForApplicationProcess(System& system, Frontend::EmuWindow& emu_window) { - host1x_core = std::make_unique(system); + host1x_core.emplace(system); gpu_core = VideoCore::CreateGPU(emu_window, system); - if (!gpu_core) { + if (!gpu_core) return SystemResultStatus::ErrorVideoCore; - } - audio_core = std::make_unique(system); + audio_core.emplace(system); service_manager = std::make_shared(kernel); - services = - std::make_unique(service_manager, system, stop_event.get_token()); + services.emplace(service_manager, system, stop_event.get_token()); is_powered_on = true; exit_locked = false; exit_requested = false; if (Settings::values.enable_renderdoc_hotkey) { - renderdoc_api = std::make_unique(); + renderdoc_api.emplace(); } LOG_DEBUG(Core, "Initialized OK"); @@ -303,16 +300,11 @@ struct System::Impl { // Create the application process Loader::ResultStatus load_result{}; std::vector control; - auto process = - Service::AM::CreateApplicationProcess(control, app_loader, load_result, system, file, - params.program_id, params.program_index); - + auto process = Service::AM::CreateApplicationProcess(control, app_loader, load_result, system, file, params.program_id, params.program_index); if (load_result != Loader::ResultStatus::Success) { LOG_CRITICAL(Core, "Failed to load ROM (Error {})!", load_result); ShutdownMainProcess(); - - return static_cast( - static_cast(SystemResultStatus::ErrorLoader) + static_cast(load_result)); + return SystemResultStatus(u32(SystemResultStatus::ErrorLoader) + u32(load_result)); } if (!app_loader) { @@ -337,8 +329,7 @@ struct System::Impl { // Set up the rest of the system. SystemResultStatus init_result{SetupForApplicationProcess(system, emu_window)}; if (init_result != SystemResultStatus::Success) { - LOG_CRITICAL(Core, "Failed to initialize system (Error {})!", - static_cast(init_result)); + LOG_CRITICAL(Core, "Failed to initialize system (Error {})!", int(init_result)); ShutdownMainProcess(); return init_result; } @@ -361,24 +352,19 @@ struct System::Impl { } } - perf_stats = std::make_unique(params.program_id); + perf_stats.emplace(params.program_id); // Reset counters and set time origin to current frame GetAndResetPerfStats(); perf_stats->BeginSystemFrame(); - std::string title_version; - const FileSys::PatchManager pm(params.program_id, system.GetFileSystemController(), - system.GetContentProvider()); - const auto metadata = pm.GetControlMetadata(); - if (metadata.first != nullptr) { - title_version = metadata.first->GetVersionString(); - } + const FileSys::PatchManager pm(params.program_id, system.GetFileSystemController(), system.GetContentProvider()); + auto const metadata = pm.GetControlMetadata(); + std::string title_version = metadata.first != nullptr ? metadata.first->GetVersionString() : ""; if (app_loader->ReadProgramId(program_id) != Loader::ResultStatus::Success) { LOG_ERROR(Core, "Failed to find program id for ROM"); } - GameSettings::LoadOverrides(program_id, gpu_core->Renderer()); if (auto room_member = Network::GetRoomMember().lock()) { Network::GameInfo game_info; @@ -387,9 +373,7 @@ struct System::Impl { game_info.version = title_version; room_member->SendGameInfo(game_info); } - - status = SystemResultStatus::Success; - return status; + return SystemResultStatus::Success; } void ShutdownMainProcess() { @@ -448,112 +432,79 @@ struct System::Impl { } Loader::ResultStatus GetGameName(std::string& out) const { - if (app_loader == nullptr) - return Loader::ResultStatus::ErrorNotInitialized; - return app_loader->ReadTitle(out); - } - - void SetStatus(SystemResultStatus new_status, const char* details = nullptr) { - status = new_status; - if (details) { - status_details = details; - } + return app_loader ? app_loader->ReadTitle(out) : Loader::ResultStatus::ErrorNotInitialized; } PerfStatsResults GetAndResetPerfStats() { return perf_stats->GetAndResetStats(core_timing.GetGlobalTimeUs()); } - mutable std::mutex suspend_guard; - std::atomic_bool is_paused{}; - std::atomic is_shutting_down{}; - Timing::CoreTiming core_timing; Kernel::KernelCore kernel; /// RealVfsFilesystem instance FileSys::VirtualFilesystem virtual_filesystem; - /// ContentProviderUnion instance - std::unique_ptr content_provider; Service::FileSystem::FileSystemController fs_controller; - /// AppLoader used to load the current executing application - std::unique_ptr app_loader; - std::unique_ptr gpu_core; - std::unique_ptr host1x_core; - std::unique_ptr device_memory; - std::unique_ptr audio_core; Core::HID::HIDCore hid_core; - CpuManager cpu_manager; - std::atomic_bool is_powered_on{}; - bool exit_locked = false; - bool exit_requested = false; - - bool nvdec_active{}; - Reporter reporter; - std::unique_ptr cheat_engine; - std::unique_ptr memory_freezer; - std::array build_id{}; - - std::unique_ptr renderdoc_api; - /// Applets Service::AM::AppletManager applet_manager; Service::AM::Frontend::FrontendAppletHolder frontend_applets; - /// APM (Performance) services Service::APM::Controller apm_controller{core_timing}; - /// Service State Service::Glue::ARPManager arp_manager; Service::Account::ProfileManager profile_manager; + /// Network instance + Network::NetworkInstance network_instance; + Core::SpeedLimiter speed_limiter; + ExecuteProgramCallback execute_program_callback; + ExitCallback exit_callback; + + std::optional services; + std::optional debugger; + std::optional general_channel_context; + std::optional general_channel_event; + std::optional perf_stats; + std::optional host1x_core; + std::optional device_memory; + std::optional audio_core; + std::optional cheat_engine; + std::optional memory_freezer; + std::optional renderdoc_api; + + std::array gpu_dirty_memory_managers; + std::vector> user_channel; + std::vector> general_channel; + + std::array dynarmic_ticks{}; + std::array build_id{}; /// Service manager std::shared_ptr service_manager; - - /// Services - std::unique_ptr services; - - /// Network instance - Network::NetworkInstance network_instance; - - /// Debugger - std::unique_ptr debugger; - - SystemResultStatus status = SystemResultStatus::Success; - std::string status_details = ""; - - std::unique_ptr perf_stats; - Core::SpeedLimiter speed_limiter; - - bool is_multicore{}; - bool is_async_gpu{}; - bool extended_memory_layout{}; - - ExecuteProgramCallback execute_program_callback; - ExitCallback exit_callback; + /// ContentProviderUnion instance + std::unique_ptr content_provider; + /// AppLoader used to load the current executing application + std::unique_ptr app_loader; + std::unique_ptr gpu_core; std::stop_source stop_event; - std::array dynarmic_ticks{}; - - std::array - gpu_dirty_memory_managers; - - std::deque> user_channel; - + mutable std::mutex suspend_guard; std::mutex general_channel_mutex; - std::deque> general_channel; - std::unique_ptr general_channel_context; // lazy - std::unique_ptr general_channel_event; // lazy - bool general_channel_initialized{false}; + std::atomic_bool is_paused{}; + std::atomic_bool is_shutting_down{}; + std::atomic_bool is_powered_on{}; + bool is_multicore : 1 = false; + bool extended_memory_layout : 1 = false; + bool exit_locked : 1 = false; + bool exit_requested : 1 = false; + bool nvdec_active : 1 = false; void EnsureGeneralChannelInitialized(System& system) { - if (general_channel_initialized) { - return; + if (!general_channel_event) { + general_channel_context.emplace(system, "GeneralChannel"); + general_channel_event.emplace(*general_channel_context); } - general_channel_context = std::make_unique(system, "GeneralChannel"); - general_channel_event = std::make_unique(*general_channel_context); - general_channel_initialized = true; } }; @@ -776,14 +727,6 @@ Loader::ResultStatus System::GetGameName(std::string& out) const { return impl->GetGameName(out); } -void System::SetStatus(SystemResultStatus new_status, const char* details) { - impl->SetStatus(new_status, details); -} - -const std::string& System::GetStatusDetails() const { - return impl->status_details; -} - Loader::AppLoader& System::GetAppLoader() { return *impl->app_loader; } @@ -803,7 +746,7 @@ FileSys::VirtualFilesystem System::GetFilesystem() const { void System::RegisterCheatList(const std::vector& list, const std::array& build_id, u64 main_region_begin, u64 main_region_size) { - impl->cheat_engine = std::make_unique(*this, list, build_id); + impl->cheat_engine.emplace(*this, list, build_id); impl->cheat_engine->SetMainMemoryParameters(main_region_begin, main_region_size); } @@ -964,11 +907,13 @@ void System::ExecuteProgram(std::size_t program_index) { } } -std::deque>& System::GetUserChannel() { +/// @brief Gets a reference to the user channel stack. +/// It is used to transfer data between programs. +std::vector>& System::GetUserChannel() { return impl->user_channel; } -std::deque>& System::GetGeneralChannel() { +std::vector>& System::GetGeneralChannel() { return impl->general_channel; } @@ -984,7 +929,7 @@ void System::PushGeneralChannelData(std::vector&& data) { bool System::TryPopGeneralChannel(std::vector& out_data) { std::scoped_lock lk{impl->general_channel_mutex}; - if (!impl->general_channel_initialized || impl->general_channel.empty()) { + if (!impl->general_channel_event || impl->general_channel.empty()) { return false; } out_data = std::move(impl->general_channel.back()); diff --git a/src/core/core.h b/src/core/core.h index 60bf73d4e1..702c5cc81b 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -325,10 +325,6 @@ public: /// Gets the name of the current game [[nodiscard]] Loader::ResultStatus GetGameName(std::string& out) const; - void SetStatus(SystemResultStatus new_status, const char* details); - - [[nodiscard]] const std::string& GetStatusDetails() const; - [[nodiscard]] Loader::AppLoader& GetAppLoader(); [[nodiscard]] const Loader::AppLoader& GetAppLoader() const; @@ -424,13 +420,8 @@ public: */ void ExecuteProgram(std::size_t program_index); - /** - * Gets a reference to the user channel stack. - * It is used to transfer data between programs. - */ - [[nodiscard]] std::deque>& GetUserChannel(); - - [[nodiscard]] std::deque>& GetGeneralChannel(); + [[nodiscard]] std::vector>& GetUserChannel(); + [[nodiscard]] std::vector>& GetGeneralChannel(); void PushGeneralChannelData(std::vector&& data); bool TryPopGeneralChannel(std::vector& out_data); [[nodiscard]] Service::Event& GetGeneralChannelEvent(); diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 3c847c8359..5a582c8cff 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -53,13 +53,6 @@ CoreTiming::~CoreTiming() { Reset(); } -void CoreTiming::ThreadEntry(CoreTiming& instance) { - Common::SetCurrentThreadName("HostTiming"); - Common::SetCurrentThreadPriority(Common::ThreadPriority::High); - instance.on_thread_init(); - instance.ThreadLoop(); -} - void CoreTiming::Initialize(std::function&& on_thread_init_) { Reset(); on_thread_init = std::move(on_thread_init_); @@ -67,7 +60,12 @@ void CoreTiming::Initialize(std::function&& on_thread_init_) { shutting_down = false; cpu_ticks = 0; if (is_multicore) { - timer_thread = std::make_unique(ThreadEntry, std::ref(*this)); + timer_thread.emplace([](CoreTiming& instance) { + Common::SetCurrentThreadName("HostTiming"); + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + instance.on_thread_init(); + instance.ThreadLoop(); + }, std::ref(*this)); } } diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 7e4dff7f3d..ae9f56d519 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -140,8 +143,6 @@ public: private: struct Event; - - static void ThreadEntry(CoreTiming& instance); void ThreadLoop(); void Reset(); @@ -164,7 +165,7 @@ private: Common::Event pause_event{}; mutable std::mutex basic_lock; std::mutex advance_lock; - std::unique_ptr timer_thread; + std::optional timer_thread; std::atomic paused{}; std::atomic paused_set{}; std::atomic wait_set{}; diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 322f971ba3..082049f957 100644 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -1148,9 +1148,17 @@ Result KProcess::GetThreadList(s32* out_num_threads, KProcessAddress out_thread_ void KProcess::Switch(KProcess* cur_process, KProcess* next_process) {} KProcess::KProcess(KernelCore& kernel) - : KAutoObjectWithSlabHeapAndContainer(kernel), m_page_table{kernel}, m_state_lock{kernel}, - m_list_lock{kernel}, m_cond_var{kernel.System()}, m_address_arbiter{kernel.System()}, - m_handle_table{kernel}, m_exclusive_monitor{}, m_memory{kernel.System()} {} + : KAutoObjectWithSlabHeapAndContainer(kernel) + , m_exclusive_monitor{} + , m_memory{kernel.System()} + , m_handle_table{kernel} + , m_page_table{kernel} + , m_state_lock{kernel} + , m_list_lock{kernel} + , m_cond_var{kernel.System()} + , m_address_arbiter{kernel.System()} +{} + KProcess::~KProcess() = default; Result KProcess::LoadFromMetadata(const FileSys::ProgramMetadata& metadata, std::size_t code_size, diff --git a/src/core/hle/kernel/k_process.h b/src/core/hle/kernel/k_process.h index 92ddb1aca4..13717cc090 100644 --- a/src/core/hle/kernel/k_process.h +++ b/src/core/hle/kernel/k_process.h @@ -66,60 +66,55 @@ public: private: using SharedMemoryInfoList = Common::IntrusiveListBaseTraits::ListType; - using TLPTree = - Common::IntrusiveRedBlackTreeBaseTraits::TreeType; + using TLPTree = Common::IntrusiveRedBlackTreeBaseTraits::TreeType; using TLPIterator = TLPTree::iterator; private: - KProcessPageTable m_page_table; - std::atomic m_used_kernel_memory_size{}; - TLPTree m_fully_used_tlp_tree{}; - TLPTree m_partially_used_tlp_tree{}; - s32 m_ideal_core_id{}; - KResourceLimit* m_resource_limit{}; - KSystemResource* m_system_resource{}; - size_t m_memory_release_hint{}; - State m_state{}; - KLightLock m_state_lock; - KLightLock m_list_lock; - KConditionVariable m_cond_var; - KAddressArbiter m_address_arbiter; - std::array m_entropy{}; - bool m_is_signaled{}; - bool m_is_initialized{}; - u32 m_pointer_buffer_size = 0x8000; // Default pointer buffer size (can be game-specific later) - bool m_is_application{}; - bool m_is_default_application_system_resource{}; - bool m_is_hbl{}; - std::array m_name{}; - std::atomic m_num_running_threads{}; - Svc::CreateProcessFlag m_flags{}; - KMemoryManager::Pool m_memory_pool{}; - s64 m_schedule_count{}; - KCapabilities m_capabilities{}; - u64 m_program_id{}; - u64 m_process_id{}; - KProcessAddress m_code_address{}; - size_t m_code_size{}; - size_t m_main_thread_stack_size{}; - size_t m_max_process_memory{}; - u32 m_version{}; - KHandleTable m_handle_table; - KProcessAddress m_plr_address{}; - KThread* m_exception_thread{}; - ThreadList m_thread_list{}; - SharedMemoryInfoList m_shared_memory_list{}; - bool m_is_suspended{}; - bool m_is_immortal{}; - bool m_is_handle_table_initialized{}; - std::array, Core::Hardware::NUM_CPU_CORES> - m_arm_interfaces{}; + std::array, Core::Hardware::NUM_CPU_CORES> m_arm_interfaces{}; std::array m_running_threads{}; std::array m_running_thread_idle_counts{}; std::array m_running_thread_switch_counts{}; std::array m_pinned_threads{}; std::array m_watchpoints{}; std::map m_debug_page_refcounts{}; +#ifdef HAS_NCE + std::unordered_map m_post_handlers{}; +#endif + std::unique_ptr m_exclusive_monitor; + Core::Memory::Memory m_memory; + KCapabilities m_capabilities{}; + KProcessAddress m_code_address{}; + KHandleTable m_handle_table; + KProcessAddress m_plr_address{}; + ThreadList m_thread_list{}; + SharedMemoryInfoList m_shared_memory_list{}; + KProcessPageTable m_page_table; + std::atomic m_used_kernel_memory_size{}; + TLPTree m_fully_used_tlp_tree{}; + TLPTree m_partially_used_tlp_tree{}; + State m_state{}; + KLightLock m_state_lock; + KLightLock m_list_lock; + KConditionVariable m_cond_var; + KAddressArbiter m_address_arbiter; + std::array m_entropy{}; + u32 m_pointer_buffer_size = 0x8000; // Default pointer buffer size (can be game-specific later) + std::array m_name{}; + Svc::CreateProcessFlag m_flags{}; + KMemoryManager::Pool m_memory_pool{}; + + KResourceLimit* m_resource_limit{}; + KSystemResource* m_system_resource{}; + KThread* m_exception_thread{}; + + size_t m_code_size{}; + size_t m_main_thread_stack_size{}; + size_t m_max_process_memory{}; + size_t m_memory_release_hint{}; + s64 m_schedule_count{}; + u64 m_program_id{}; + u64 m_process_id{}; + std::atomic m_cpu_time{}; std::atomic m_num_process_switches{}; std::atomic m_num_thread_switches{}; @@ -128,11 +123,20 @@ private: std::atomic m_num_ipc_messages{}; std::atomic m_num_ipc_replies{}; std::atomic m_num_ipc_receives{}; -#ifdef HAS_NCE - std::unordered_map m_post_handlers{}; -#endif - std::unique_ptr m_exclusive_monitor; - Core::Memory::Memory m_memory; + + s32 m_ideal_core_id{}; + u32 m_version{}; + + std::atomic m_num_running_threads{}; + + bool m_is_signaled : 1 = false; + bool m_is_initialized : 1 = false; + bool m_is_application : 1 = false; + bool m_is_default_application_system_resource : 1 = false; + bool m_is_hbl : 1 = false; + bool m_is_suspended : 1 = false; + bool m_is_immortal : 1 = false; + bool m_is_handle_table_initialized : 1 = false; private: Result StartTermination(); diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 062387a29b..6986a98e35 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -88,11 +88,11 @@ struct KernelCore::Impl { } void Initialize(KernelCore& kernel) { - hardware_timer = std::make_unique(kernel); + hardware_timer.emplace(kernel); hardware_timer->Initialize(); - global_object_list_container = std::make_unique(kernel); - global_scheduler_context = std::make_unique(kernel); + global_object_list_container.emplace(kernel); + global_scheduler_context.emplace(kernel); // Derive the initial memory layout from the emulated board Init::InitializeSlabResourceCounts(kernel); @@ -212,10 +212,9 @@ struct KernelCore::Impl { void InitializePhysicalCores() { for (u32 i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) { - const s32 core{static_cast(i)}; - - schedulers[i] = std::make_unique(system.Kernel()); - cores[i] = std::make_unique(system.Kernel(), i); + auto const core = s32(i); + schedulers[i].emplace(system.Kernel()); + cores[i].emplace(system.Kernel(), i); auto* main_thread{Kernel::KThread::Create(system.Kernel())}; main_thread->SetCurrentCore(core); @@ -280,57 +279,56 @@ struct KernelCore::Impl { size -= rc_size; // Initialize the resource managers' shared page manager. - resource_manager_page_manager = std::make_unique(); + resource_manager_page_manager.emplace(); resource_manager_page_manager->Initialize(address, size, std::max(PageSize, KPageBufferSlabHeap::BufferSize)); // Initialize the KPageBuffer slab heap. page_buffer_slab_heap.Initialize(system); // Initialize the fixed-size slabheaps. - app_memory_block_heap = std::make_unique(); - sys_memory_block_heap = std::make_unique(); - block_info_heap = std::make_unique(); - app_memory_block_heap->Initialize(resource_manager_page_manager.get(), ApplicationMemoryBlockSlabHeapSize); - sys_memory_block_heap->Initialize(resource_manager_page_manager.get(), SystemMemoryBlockSlabHeapSize); - block_info_heap->Initialize(resource_manager_page_manager.get(), BlockInfoSlabHeapSize); + app_memory_block_heap.emplace(); + sys_memory_block_heap.emplace(); + block_info_heap.emplace(); + app_memory_block_heap->Initialize(std::addressof(*resource_manager_page_manager), ApplicationMemoryBlockSlabHeapSize); + sys_memory_block_heap->Initialize(std::addressof(*resource_manager_page_manager), SystemMemoryBlockSlabHeapSize); + block_info_heap->Initialize(std::addressof(*resource_manager_page_manager), BlockInfoSlabHeapSize); // Reserve all but a fixed number of remaining pages for the page table heap. const size_t num_pt_pages = resource_manager_page_manager->GetCount() - resource_manager_page_manager->GetUsed() - ReservedDynamicPageCount; - page_table_heap = std::make_unique(); + page_table_heap.emplace(); // TODO(bunnei): Pass in address once we support kernel virtual memory allocations. page_table_heap->Initialize( - resource_manager_page_manager.get(), num_pt_pages, + std::addressof(*resource_manager_page_manager), num_pt_pages, /*GetPointer(address + size)*/ nullptr); // Setup the slab managers. KDynamicPageManager* const app_dynamic_page_manager = nullptr; KDynamicPageManager* const sys_dynamic_page_manager = /*KTargetSystem::IsDynamicResourceLimitsEnabled()*/ true - ? resource_manager_page_manager.get() - : nullptr; - app_memory_block_manager = std::make_unique(); - sys_memory_block_manager = std::make_unique(); - app_block_info_manager = std::make_unique(); - sys_block_info_manager = std::make_unique(); - app_page_table_manager = std::make_unique(); - sys_page_table_manager = std::make_unique(); + ? std::addressof(*resource_manager_page_manager) : nullptr; + app_memory_block_manager.emplace(); + sys_memory_block_manager.emplace(); + app_block_info_manager.emplace(); + sys_block_info_manager.emplace(); + app_page_table_manager.emplace(); + sys_page_table_manager.emplace(); - app_memory_block_manager->Initialize(app_dynamic_page_manager, app_memory_block_heap.get()); - sys_memory_block_manager->Initialize(sys_dynamic_page_manager, sys_memory_block_heap.get()); + app_memory_block_manager->Initialize(app_dynamic_page_manager, std::addressof(*app_memory_block_heap)); + sys_memory_block_manager->Initialize(sys_dynamic_page_manager, std::addressof(*sys_memory_block_heap)); - app_block_info_manager->Initialize(app_dynamic_page_manager, block_info_heap.get()); - sys_block_info_manager->Initialize(sys_dynamic_page_manager, block_info_heap.get()); + app_block_info_manager->Initialize(app_dynamic_page_manager, std::addressof(*block_info_heap)); + sys_block_info_manager->Initialize(sys_dynamic_page_manager, std::addressof(*block_info_heap)); - app_page_table_manager->Initialize(app_dynamic_page_manager, page_table_heap.get()); - sys_page_table_manager->Initialize(sys_dynamic_page_manager, page_table_heap.get()); + app_page_table_manager->Initialize(app_dynamic_page_manager, std::addressof(*page_table_heap)); + sys_page_table_manager->Initialize(sys_dynamic_page_manager, std::addressof(*page_table_heap)); // Check that we have the correct number of dynamic pages available. ASSERT(resource_manager_page_manager->GetCount() - resource_manager_page_manager->GetUsed() == ReservedDynamicPageCount); // Create the system page table managers. - app_system_resource = std::make_unique(kernel); - sys_system_resource = std::make_unique(kernel); + app_system_resource.emplace(kernel); + sys_system_resource.emplace(kernel); KAutoObject::Create(std::addressof(*app_system_resource)); KAutoObject::Create(std::addressof(*sys_system_resource)); @@ -349,7 +347,7 @@ struct KernelCore::Impl { } void InitializeGlobalData(KernelCore& kernel) { - object_name_global_data = std::make_unique(kernel); + object_name_global_data.emplace(kernel); } void MakeApplicationProcess(KProcess* process) { @@ -431,7 +429,7 @@ struct KernelCore::Impl { } void DeriveInitialMemoryLayout() { - memory_layout = std::make_unique(); + memory_layout.emplace(); // Insert the root region for the virtual memory tree, from which all other regions will // derive. @@ -726,7 +724,7 @@ struct KernelCore::Impl { void InitializeMemoryLayout() { // Initialize the memory manager. - memory_manager = std::make_unique(system); + memory_manager.emplace(system); const auto& management_region = memory_layout->GetPoolManagementRegion(); ASSERT(management_region.GetEndAddress() != 0); memory_manager->Initialize(management_region.GetAddress(), management_region.GetSize()); @@ -774,8 +772,8 @@ struct KernelCore::Impl { std::mutex process_list_lock; std::vector process_list; KProcess* application_process{}; - std::unique_ptr global_scheduler_context; - std::unique_ptr hardware_timer; + std::optional global_scheduler_context; + std::optional hardware_timer; Init::KSlabResourceCounts slab_resource_counts{}; KResourceLimit* system_resource_limit{}; @@ -784,9 +782,9 @@ struct KernelCore::Impl { std::shared_ptr preemption_event; - std::unique_ptr global_object_list_container; + std::optional global_object_list_container; - std::unique_ptr object_name_global_data; + std::optional object_name_global_data; std::unordered_set registered_objects; std::unordered_set registered_in_use_objects; @@ -794,28 +792,28 @@ struct KernelCore::Impl { std::mutex server_lock; std::vector> server_managers; - std::array, Core::Hardware::NUM_CPU_CORES> cores; + std::array, Core::Hardware::NUM_CPU_CORES> cores; // Next host thead ID to use, 0-3 IDs represent core threads, >3 represent others std::atomic next_host_thread_id{Core::Hardware::NUM_CPU_CORES}; // Kernel memory management - std::unique_ptr memory_manager; + std::optional memory_manager; // Resource managers - std::unique_ptr resource_manager_page_manager; - std::unique_ptr page_table_heap; - std::unique_ptr app_memory_block_heap; - std::unique_ptr sys_memory_block_heap; - std::unique_ptr block_info_heap; - std::unique_ptr app_page_table_manager; - std::unique_ptr sys_page_table_manager; - std::unique_ptr app_memory_block_manager; - std::unique_ptr sys_memory_block_manager; - std::unique_ptr app_block_info_manager; - std::unique_ptr sys_block_info_manager; - std::unique_ptr app_system_resource; - std::unique_ptr sys_system_resource; + std::optional resource_manager_page_manager; + std::optional page_table_heap; + std::optional app_memory_block_heap; + std::optional sys_memory_block_heap; + std::optional block_info_heap; + std::optional app_page_table_manager; + std::optional sys_page_table_manager; + std::optional app_memory_block_manager; + std::optional sys_memory_block_manager; + std::optional app_block_info_manager; + std::optional sys_block_info_manager; + std::optional app_system_resource; + std::optional sys_system_resource; // Shared memory for services Kernel::KSharedMemory* hid_shared_mem{}; @@ -825,10 +823,10 @@ struct KernelCore::Impl { Kernel::KSharedMemory* hidbus_shared_mem{}; // Memory layout - std::unique_ptr memory_layout; + std::optional memory_layout; std::array shutdown_threads{}; - std::array, Core::Hardware::NUM_CPU_CORES> schedulers{}; + std::array, Core::Hardware::NUM_CPU_CORES> schedulers{}; bool is_multicore{}; std::atomic_bool is_shutting_down{}; @@ -948,12 +946,9 @@ const Kernel::PhysicalCore& KernelCore::CurrentPhysicalCore() const { } Kernel::KScheduler* KernelCore::CurrentScheduler() { - const u32 core_id = impl->GetCurrentHostThreadID(); - if (core_id >= Core::Hardware::NUM_CPU_CORES) { - // This is expected when called from not a guest thread - return {}; - } - return impl->schedulers[core_id].get(); + if (auto const core_id = impl->GetCurrentHostThreadID(); core_id < Core::Hardware::NUM_CPU_CORES) + return std::addressof(*impl->schedulers[core_id]); + return {}; // This is expected when called from not a guest thread } Kernel::KHardwareTimer& KernelCore::HardwareTimer() { diff --git a/src/core/hle/service/am/applet.h b/src/core/hle/service/am/applet.h index 0763a5838e..a693a47d7a 100644 --- a/src/core/hle/service/am/applet.h +++ b/src/core/hle/service/am/applet.h @@ -95,9 +95,9 @@ struct Applet { bool request_exit_to_library_applet_at_execute_next_program_enabled{}; // Channels - std::deque> user_channel_launch_parameter{}; - std::deque> preselected_user_launch_parameter{}; - std::deque> friend_invitation_storage_channel{}; + std::vector> user_channel_launch_parameter{}; + std::vector> preselected_user_launch_parameter{}; + std::vector> friend_invitation_storage_channel{}; // Context Stack std::stack> context_stack{}; diff --git a/src/core/hle/service/ns/platform_service_manager.cpp b/src/core/hle/service/ns/platform_service_manager.cpp index 293c014eae..ec9f64945d 100644 --- a/src/core/hle/service/ns/platform_service_manager.cpp +++ b/src/core/hle/service/ns/platform_service_manager.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "common/assert.h" #include "common/common_types.h" @@ -40,96 +41,51 @@ constexpr u32 EXPECTED_MAGIC{0x36f81a1e}; // What we expect the encrypted bfttf constexpr u64 SHARED_FONT_MEM_SIZE{0x1100000}; constexpr FontRegion EMPTY_REGION{0, 0}; -static void DecryptSharedFont(const std::vector& input, Kernel::PhysicalMemory& output, - std::size_t& offset) { - ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, - "Shared fonts exceeds 17mb!"); - ASSERT_MSG(input[0] == EXPECTED_MAGIC, "Failed to derive key, unexpected magic number"); - +static void DecryptSharedFont(const std::span input, std::span output, std::size_t& offset) { + ASSERT(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE && "Shared fonts exceeds 17mb!"); + ASSERT(input[0] == EXPECTED_MAGIC && "Failed to derive key, unexpected magic number"); const u32 KEY = input[0] ^ EXPECTED_RESULT; // Derive key using an inverse xor std::vector transformed_font(input.size()); // TODO(ogniK): Figure out a better way to do this - std::transform(input.begin(), input.end(), transformed_font.begin(), - [&KEY](u32 font_data) { return Common::swap32(font_data ^ KEY); }); + std::transform(input.begin(), input.end(), transformed_font.begin(), [&KEY](u32 font_data) { return Common::swap32(font_data ^ KEY); }); transformed_font[1] = Common::swap32(transformed_font[1]) ^ KEY; // "re-encrypt" the size - std::memcpy(output.data() + offset, transformed_font.data(), - transformed_font.size() * sizeof(u32)); + std::memcpy(output.data() + offset, transformed_font.data(), transformed_font.size() * sizeof(u32)); offset += transformed_font.size() * sizeof(u32); } void DecryptSharedFontToTTF(const std::vector& input, std::vector& output) { ASSERT_MSG(input[0] == EXPECTED_MAGIC, "Failed to derive key, unexpected magic number"); - if (input.size() < 2) { LOG_ERROR(Service_NS, "Input font is empty"); return; } - const u32 KEY = input[0] ^ EXPECTED_RESULT; // Derive key using an inverse xor std::vector transformed_font(input.size()); // TODO(ogniK): Figure out a better way to do this - std::transform(input.begin(), input.end(), transformed_font.begin(), - [&KEY](u32 font_data) { return Common::swap32(font_data ^ KEY); }); - std::memcpy(output.data(), transformed_font.data() + 2, - (transformed_font.size() - 2) * sizeof(u32)); + std::transform(input.begin(), input.end(), transformed_font.begin(), [&KEY](u32 font_data) { return Common::swap32(font_data ^ KEY); }); + std::memcpy(output.data(), transformed_font.data() + 2, (transformed_font.size() - 2) * sizeof(u32)); } -void EncryptSharedFont(const std::vector& input, std::vector& output, - std::size_t& offset) { - ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, - "Shared fonts exceeds 17mb!"); - +void EncryptSharedFont(const std::vector& input, std::vector& output, std::size_t& offset) { + ASSERT(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE && "Shared fonts exceeds 17mb!"); const auto key = Common::swap32(EXPECTED_RESULT ^ EXPECTED_MAGIC); std::vector transformed_font(input.size() + 2); transformed_font[0] = Common::swap32(EXPECTED_MAGIC); transformed_font[1] = Common::swap32(static_cast(input.size() * sizeof(u32))) ^ key; - std::transform(input.begin(), input.end(), transformed_font.begin() + 2, - [key](u32 in) { return in ^ key; }); - std::memcpy(output.data() + offset, transformed_font.data(), - transformed_font.size() * sizeof(u32)); + std::transform(input.begin(), input.end(), transformed_font.begin() + 2, [key](u32 in) { return in ^ key; }); + std::memcpy(output.data() + offset, transformed_font.data(), transformed_font.size() * sizeof(u32)); offset += transformed_font.size() * sizeof(u32); } -// Helper function to make BuildSharedFontsRawRegions a bit nicer -static u32 GetU32Swapped(const u8* data) { - u32 value; - std::memcpy(&value, data, sizeof(value)); - return Common::swap32(value); -} - struct IPlatformServiceManager::Impl { const FontRegion& GetSharedFontRegion(std::size_t index) const { - if (index >= shared_font_regions.size() || shared_font_regions.empty()) { - // No font fallback - return EMPTY_REGION; - } - return shared_font_regions.at(index); + return index < shared_font_regions.size() ? shared_font_regions[index] : EMPTY_REGION; } - - void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) { - // As we can derive the xor key we can just populate the offsets - // based on the shared memory dump - unsigned cur_offset = 0; - - for (std::size_t i = 0; i < SHARED_FONTS.size(); i++) { - // Out of shared fonts/invalid font - if (GetU32Swapped(input.data() + cur_offset) != EXPECTED_RESULT) { - break; - } - - // Derive key within inverse xor - const u32 KEY = GetU32Swapped(input.data() + cur_offset) ^ EXPECTED_MAGIC; - const u32 SIZE = GetU32Swapped(input.data() + cur_offset + 4) ^ KEY; - shared_font_regions.push_back(FontRegion{cur_offset + 8, SIZE}); - cur_offset += SIZE + 8; - } - } - - /// Backing memory for the shared font data - std::shared_ptr shared_font; - // Automatically populated based on shared_fonts dump or system archives. - std::vector shared_font_regions; + // 6 builtin fonts + extra 2 for whatever may come after + boost::container::static_vector shared_font_regions; + /// Backing memory for the shared font data + std::array shared_font; }; IPlatformServiceManager::IPlatformServiceManager(Core::System& system_, const char* service_name_) @@ -162,8 +118,6 @@ IPlatformServiceManager::IPlatformServiceManager(Core::System& system_, const ch const auto* nand = fsc.GetSystemNANDContents(); std::size_t offset = 0; // Rebuild shared fonts from data ncas or synthesize - - impl->shared_font = std::make_shared(SHARED_FONT_MEM_SIZE); for (auto& font : SHARED_FONTS) { FileSys::VirtualFile romfs; const auto nca = @@ -197,9 +151,8 @@ IPlatformServiceManager::IPlatformServiceManager(Core::System& system_, const ch std::transform(font_data_u32.begin(), font_data_u32.end(), font_data_u32.begin(), Common::swap32); // Font offset and size do not account for the header - const FontRegion region{static_cast(offset + 8), - static_cast((font_data_u32.size() * sizeof(u32)) - 8)}; - DecryptSharedFont(font_data_u32, *impl->shared_font, offset); + const FontRegion region{u32(offset + 8), u32((font_data_u32.size() * sizeof(u32)) - 8)}; + DecryptSharedFont(font_data_u32, impl->shared_font, offset); impl->shared_font_regions.push_back(region); } } @@ -231,14 +184,12 @@ Result IPlatformServiceManager::GetSharedMemoryAddressOffset(Out out_shared R_SUCCEED(); } -Result IPlatformServiceManager::GetSharedMemoryNativeHandle( - OutCopyHandle out_shared_memory_native_handle) { +Result IPlatformServiceManager::GetSharedMemoryNativeHandle(OutCopyHandle out_shared_memory_native_handle) { // Map backing memory for the font data LOG_DEBUG(Service_NS, "called"); // Create shared font memory object - std::memcpy(kernel.GetFontSharedMem().GetPointer(), impl->shared_font->data(), - impl->shared_font->size()); + std::memcpy(kernel.GetFontSharedMem().GetPointer(), impl->shared_font.data(), impl->shared_font.size()); // FIXME: this shouldn't belong to the kernel *out_shared_memory_native_handle = &kernel.GetFontSharedMem(); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 94599532b3..c94b66e6bc 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -87,12 +87,8 @@ add_library(video_core STATIC host1x/syncpoint_manager.h host1x/vic.cpp host1x/vic.h - macro/macro.cpp - macro/macro.h - macro/macro_hle.cpp - macro/macro_hle.h - macro/macro_interpreter.cpp - macro/macro_interpreter.h + macro.cpp + macro.h fence_manager.h gpu.cpp gpu.h @@ -375,10 +371,6 @@ else() endif() if (ARCHITECTURE_x86_64) - target_sources(video_core PRIVATE - macro/macro_jit_x64.cpp - macro/macro_jit_x64.h - ) target_link_libraries(video_core PUBLIC xbyak::xbyak) endif() diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index a67b35453b..03b2e3fdf9 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -107,35 +107,27 @@ bool DmaPusher::Step() { } void DmaPusher::ProcessCommands(std::span commands) { - for (std::size_t index = 0; index < commands.size();) { - const CommandHeader& command_header = commands[index]; - - if (dma_state.method_count) { - // Data word of methods command - dma_state.dma_word_offset = static_cast(index * sizeof(u32)); - if (dma_state.non_incrementing) { - const u32 max_write = static_cast( - std::min(index + dma_state.method_count, commands.size()) - index); - CallMultiMethod(&command_header.argument, max_write); - dma_state.method_count -= max_write; - dma_state.is_last_call = true; - index += max_write; - continue; - } else { - dma_state.is_last_call = dma_state.method_count <= 1; - CallMethod(command_header.argument); - } - - if (!dma_state.non_incrementing) { - dma_state.method++; - } - - if (dma_increment_once) { - dma_state.non_incrementing = true; - } - + for (size_t index = 0; index < commands.size();) { + // Data word of methods command + if (dma_state.method_count && dma_state.non_incrementing) { + auto const& command_header = commands[index]; //must ref (MUltiMethod re) + dma_state.dma_word_offset = u32(index * sizeof(u32)); + const u32 max_write = u32(std::min(index + dma_state.method_count, commands.size()) - index); + CallMultiMethod(&command_header.argument, max_write); + dma_state.method_count -= max_write; + dma_state.is_last_call = true; + index += max_write; + } else if (dma_state.method_count) { + auto const command_header = commands[index]; //can copy + dma_state.dma_word_offset = u32(index * sizeof(u32)); + dma_state.is_last_call = dma_state.method_count <= 1; + CallMethod(command_header.argument); + dma_state.method += !dma_state.non_incrementing ? 1 : 0; + dma_state.non_incrementing |= dma_increment_once; dma_state.method_count--; + index++; } else { + auto const command_header = commands[index]; //can copy // No command active - this is the first word of a new one switch (command_header.mode) { case SubmissionMode::Increasing: @@ -151,8 +143,7 @@ void DmaPusher::ProcessCommands(std::span commands) { case SubmissionMode::Inline: dma_state.method = command_header.method; dma_state.subchannel = command_header.subchannel; - dma_state.dma_word_offset = static_cast( - -static_cast(dma_state.dma_get)); // negate to set address as 0 + dma_state.dma_word_offset = u64(-s64(dma_state.dma_get)); // negate to set address as 0 CallMethod(command_header.arg_count); dma_state.non_incrementing = true; dma_increment_once = false; @@ -165,8 +156,8 @@ void DmaPusher::ProcessCommands(std::span commands) { default: break; } + index++; } - index++; } } @@ -186,26 +177,24 @@ void DmaPusher::CallMethod(u32 argument) const { }); } else { auto subchannel = subchannels[dma_state.subchannel]; - if (!subchannel->execution_mask[dma_state.method]) [[likely]] { + if (!subchannel->execution_mask[dma_state.method]) { subchannel->method_sink.emplace_back(dma_state.method, argument); - return; + } else { + subchannel->ConsumeSink(); + subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; + subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call); } - subchannel->ConsumeSink(); - subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; - subchannel->CallMethod(dma_state.method, argument, dma_state.is_last_call); } } void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const { if (dma_state.method < non_puller_methods) { - puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, - dma_state.method_count); + puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods, dma_state.method_count); } else { auto subchannel = subchannels[dma_state.subchannel]; subchannel->ConsumeSink(); subchannel->current_dma_segment = dma_state.dma_get + dma_state.dma_word_offset; - subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, - dma_state.method_count); + subchannel->CallMultiMethod(dma_state.method, base_start, num_methods, dma_state.method_count); } } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 77729fd5b6..d8d2ad74c6 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -27,9 +27,7 @@ constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) : draw_manager{std::make_unique(this)}, system{system_}, - memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{ - memory_manager, - regs.upload} { + memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} { dirty.flags.flip(); InitializeRegisterDefaults(); execution_mask.reset(); @@ -329,8 +327,7 @@ void Maxwell3D::ProcessDirtyRegisters(u32 method, u32 argument) { } } -void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, - bool is_last_call) { +void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argument, bool is_last_call) { switch (method) { case MAXWELL3D_REG_INDEX(wait_for_idle): return rasterizer->WaitForIdle(); @@ -427,9 +424,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { return; } - ASSERT_MSG(method < Regs::NUM_REGS, - "Invalid Maxwell3D register, increase the size of the Regs structure"); - + ASSERT(method < Regs::NUM_REGS && "Invalid Maxwell3D register, increase the size of the Regs structure"); const u32 argument = ProcessShadowRam(method, method_argument); ProcessDirtyRegisters(method, argument); ProcessMethodCall(method, argument, method_argument, is_last_call); @@ -670,7 +665,7 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { } u32 Maxwell3D::GetRegisterValue(u32 method) const { - ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register"); + ASSERT(method < Regs::NUM_REGS && "Invalid Maxwell3D register"); return regs.reg_array[method]; } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ae2e7a84c4..8c50a4ea2f 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -23,7 +23,7 @@ #include "video_core/engines/engine_interface.h" #include "video_core/engines/engine_upload.h" #include "video_core/gpu.h" -#include "video_core/macro/macro.h" +#include "video_core/macro.h" #include "video_core/textures/texture.h" namespace Core { @@ -3203,7 +3203,7 @@ private: std::vector macro_params; /// Interpreter for the macro codes uploaded to the GPU. - std::unique_ptr macro_engine; + std::optional macro_engine; Upload::State upload_state; diff --git a/src/video_core/macro.cpp b/src/video_core/macro.cpp new file mode 100644 index 0000000000..3fe69be4dd --- /dev/null +++ b/src/video_core/macro.cpp @@ -0,0 +1,1667 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include + +#include +#ifdef ARCHITECTURE_x86_64 +// xbyak hates human beings +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wshadow" +#endif +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wconversion" +#pragma clang diagnostic ignored "-Wshadow" +#endif +#include +#endif + +#include "common/assert.h" +#include "common/scope_exit.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" +#include "common/settings.h" +#include "common/container_hash.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/engines/draw_manager.h" +#include "video_core/dirty_flags.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/macro.h" + +#include "common/assert.h" +#include "common/bit_field.h" +#include "common/logging/log.h" +#ifdef ARCHITECTURE_x86_64 +#include "common/x64/xbyak_abi.h" +#include "common/x64/xbyak_util.h" +#endif +#include "video_core/engines/maxwell_3d.h" + +namespace Tegra { + +using Maxwell3D = Engines::Maxwell3D; + +namespace { + +bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) { + switch (topology) { + case Maxwell3D::Regs::PrimitiveTopology::Points: + case Maxwell3D::Regs::PrimitiveTopology::Lines: + case Maxwell3D::Regs::PrimitiveTopology::LineLoop: + case Maxwell3D::Regs::PrimitiveTopology::LineStrip: + case Maxwell3D::Regs::PrimitiveTopology::Triangles: + case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: + case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: + case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: + case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: + case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: + case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: + case Maxwell3D::Regs::PrimitiveTopology::Patches: + return true; + case Maxwell3D::Regs::PrimitiveTopology::Quads: + case Maxwell3D::Regs::PrimitiveTopology::QuadStrip: + case Maxwell3D::Regs::PrimitiveTopology::Polygon: + default: + return false; + } +} + +class HLEMacroImpl : public CachedMacro { +public: + explicit HLEMacroImpl(Maxwell3D& maxwell3d_) + : CachedMacro(maxwell3d_) + {} +}; + +/// @note: these macros have two versions, a normal and extended version, with the extended version +/// also assigning the base vertex/instance. +template +class HLE_DrawArraysIndirect final : public HLEMacroImpl { +public: + explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_) + : HLEMacroImpl(maxwell3d_) + {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + auto topology = static_cast(parameters[0]); + if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { + Fallback(parameters); + return; + } + + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_byte_count = false; + params.is_indexed = false; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.GetMacroAddress(1); + params.buffer_size = 4 * sizeof(u32); + params.max_draw_counts = 1; + params.stride = 0; + + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + } + + maxwell3d.draw_manager->DrawArrayIndirect(topology); + + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + } + } + +private: + void Fallback(const std::vector& parameters) { + SCOPE_EXIT { + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + } + }; + maxwell3d.RefreshParameters(); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + + auto topology = static_cast(parameters[0]); + const u32 vertex_first = parameters[3]; + const u32 vertex_count = parameters[1]; + + if (!IsTopologySafe(topology) && size_t(maxwell3d.GetMaxCurrentVertices()) < size_t(vertex_first) + size_t(vertex_count)) { + ASSERT(false && "Faulty draw!"); + return; + } + + const u32 base_instance = parameters[4]; + if (extended) { + maxwell3d.regs.global_base_instance_index = base_instance; + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + } + + maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, + instance_count); + + if (extended) { + maxwell3d.regs.global_base_instance_index = 0; + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + } + } +}; + +/* + * @note: these macros have two versions, a normal and extended version, with the extended version + * also assigning the base vertex/instance. + */ +template +class HLE_DrawIndexedIndirect final : public HLEMacroImpl { +public: + explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + auto topology = static_cast(parameters[0]); + if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { + Fallback(parameters); + return; + } + + const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.vertex_id_base = element_base; + maxwell3d.regs.global_base_vertex_index = element_base; + maxwell3d.regs.global_base_instance_index = base_instance; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + } + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_byte_count = false; + params.is_indexed = true; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.GetMacroAddress(1); + params.buffer_size = 5 * sizeof(u32); + params.max_draw_counts = 1; + params.stride = 0; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.regs.global_base_vertex_index = 0x0; + maxwell3d.regs.global_base_instance_index = 0x0; + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + } + } + +private: + void Fallback(const std::vector& parameters) { + maxwell3d.RefreshParameters(); + const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); + const u32 element_base = parameters[4]; + const u32 base_instance = parameters[5]; + maxwell3d.regs.vertex_id_base = element_base; + maxwell3d.regs.global_base_vertex_index = element_base; + maxwell3d.regs.global_base_instance_index = base_instance; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType(0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType(0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + } + + maxwell3d.draw_manager->DrawIndex(Tegra::Maxwell3D::Regs::PrimitiveTopology(parameters[0]), parameters[3], parameters[1], element_base, base_instance, instance_count); + + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.regs.global_base_vertex_index = 0x0; + maxwell3d.regs.global_base_instance_index = 0x0; + if (extended) { + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + } + } +}; + +class HLE_MultiLayerClear final : public HLEMacroImpl { +public: + explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + ASSERT(parameters.size() == 1); + + const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; + const u32 rt_index = clear_params.RT; + const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; + ASSERT(clear_params.layer == 0); + + maxwell3d.regs.clear_surface.raw = clear_params.raw; + maxwell3d.draw_manager->Clear(num_layers); + } +}; + +class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl { +public: + explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + const auto topology = Maxwell3D::Regs::PrimitiveTopology(parameters[2]); + if (!IsTopologySafe(topology)) { + Fallback(parameters); + return; + } + + const u32 start_indirect = parameters[0]; + const u32 end_indirect = parameters[1]; + if (start_indirect >= end_indirect) { + // Nothing to do. + return; + } + + const u32 padding = parameters[3]; // padding is in words + + // size of each indirect segment + const u32 indirect_words = 5 + padding; + const u32 stride = indirect_words * sizeof(u32); + const std::size_t draw_count = end_indirect - start_indirect; + const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_byte_count = false; + params.is_indexed = true; + params.include_count = true; + params.count_start_address = maxwell3d.GetMacroAddress(4); + params.indirect_start_address = maxwell3d.GetMacroAddress(5); + params.buffer_size = stride * draw_count; + params.max_draw_counts = draw_count; + params.stride = stride; + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + maxwell3d.SetHLEReplacementAttributeType(0, 0x648, + Maxwell3D::HLEReplacementAttributeType::DrawID); + maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + } + +private: + void Fallback(const std::vector& parameters) { + SCOPE_EXIT { + // Clean everything. + maxwell3d.regs.vertex_id_base = 0x0; + maxwell3d.engine_state = Maxwell3D::EngineHint::None; + maxwell3d.replace_table.clear(); + }; + maxwell3d.RefreshParameters(); + const u32 start_indirect = parameters[0]; + const u32 end_indirect = parameters[1]; + if (start_indirect >= end_indirect) { + // Nothing to do. + return; + } + const auto topology = static_cast(parameters[2]); + const u32 padding = parameters[3]; + const std::size_t max_draws = parameters[4]; + + const u32 indirect_words = 5 + padding; + const std::size_t first_draw = start_indirect; + const std::size_t effective_draws = end_indirect - start_indirect; + const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws); + + for (std::size_t index = first_draw; index < last_draw; index++) { + const std::size_t base = index * indirect_words + 5; + const u32 base_vertex = parameters[base + 3]; + const u32 base_instance = parameters[base + 4]; + maxwell3d.regs.vertex_id_base = base_vertex; + maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); + maxwell3d.SetHLEReplacementAttributeType( + 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); + maxwell3d.CallMethod(0x8e3, 0x648, true); + maxwell3d.CallMethod(0x8e4, static_cast(index), true); + maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; + maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], + base_vertex, base_instance, parameters[base + 1]); + } + } +}; + +class HLE_DrawIndirectByteCount final : public HLEMacroImpl { +public: + explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback(); + + auto topology = static_cast(parameters[0] & 0xFFFFU); + if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) { + Fallback(parameters); + return; + } + auto& params = maxwell3d.draw_manager->GetIndirectParams(); + params.is_byte_count = true; + params.is_indexed = false; + params.include_count = false; + params.count_start_address = 0; + params.indirect_start_address = maxwell3d.GetMacroAddress(2); + params.buffer_size = 4; + params.max_draw_counts = 1; + params.stride = parameters[1]; + maxwell3d.regs.draw.begin = parameters[0]; + maxwell3d.regs.draw_auto_stride = parameters[1]; + maxwell3d.regs.draw_auto_byte_count = parameters[2]; + + maxwell3d.draw_manager->DrawArrayIndirect(topology); + } + +private: + void Fallback(const std::vector& parameters) { + maxwell3d.RefreshParameters(); + + maxwell3d.regs.draw.begin = parameters[0]; + maxwell3d.regs.draw_auto_stride = parameters[1]; + maxwell3d.regs.draw_auto_byte_count = parameters[2]; + + maxwell3d.draw_manager->DrawArray( + maxwell3d.regs.draw.topology, 0, + maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); + } +}; + +class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { +public: + explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; + const u32 address = maxwell3d.regs.shadow_scratch[24]; + auto& const_buffer = maxwell3d.regs.const_buffer; + const_buffer.size = 0x7000; + const_buffer.address_high = (address >> 24) & 0xFF; + const_buffer.address_low = address << 8; + const_buffer.offset = offset; + } +}; + +class HLE_D7333D26E0A93EDE final : public HLEMacroImpl { +public: + explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + const size_t index = parameters[0]; + const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; + const u32 size = maxwell3d.regs.shadow_scratch[47 + index]; + auto& const_buffer = maxwell3d.regs.const_buffer; + const_buffer.size = size; + const_buffer.address_high = (address >> 24) & 0xFF; + const_buffer.address_low = address << 8; + } +}; + +class HLE_BindShader final : public HLEMacroImpl { +public: + explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + auto& regs = maxwell3d.regs; + const u32 index = parameters[0]; + if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) { + return; + } + + regs.pipelines[index & 0xF].offset = parameters[2]; + maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true; + regs.shadow_scratch[28 + index] = parameters[1]; + regs.shadow_scratch[34 + index] = parameters[2]; + + const u32 address = parameters[4]; + auto& const_buffer = regs.const_buffer; + const_buffer.size = 0x10000; + const_buffer.address_high = (address >> 24) & 0xFF; + const_buffer.address_low = address << 8; + + const size_t bind_group_id = parameters[3] & 0x7F; + auto& bind_group = regs.bind_groups[bind_group_id]; + bind_group.raw_config = 0x11; + maxwell3d.ProcessCBBind(bind_group_id); + } +}; + +class HLE_SetRasterBoundingBox final : public HLEMacroImpl { +public: + explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + const u32 raster_mode = parameters[0]; + auto& regs = maxwell3d.regs; + const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable; + const u32 scratch_data = maxwell3d.regs.shadow_scratch[52]; + regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; + regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); + } +}; + +template +class HLE_ClearConstBuffer final : public HLEMacroImpl { +public: + explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + static constexpr std::array zeroes{}; + auto& regs = maxwell3d.regs; + regs.const_buffer.size = u32(base_size); + regs.const_buffer.address_high = parameters[0]; + regs.const_buffer.address_low = parameters[1]; + regs.const_buffer.offset = 0; + maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4); + } +}; + +class HLE_ClearMemory final : public HLEMacroImpl { +public: + explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + + const u32 needed_memory = parameters[2] / sizeof(u32); + if (needed_memory > zero_memory.size()) { + zero_memory.resize(needed_memory, 0); + } + auto& regs = maxwell3d.regs; + regs.upload.line_length_in = parameters[2]; + regs.upload.line_count = 1; + regs.upload.dest.address_high = parameters[0]; + regs.upload.dest.address_low = parameters[1]; + maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); + maxwell3d.CallMultiMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), zero_memory.data(), needed_memory, needed_memory); + } + +private: + std::vector zero_memory; +}; + +class HLE_TransformFeedbackSetup final : public HLEMacroImpl { +public: + explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} + + void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { + maxwell3d.RefreshParameters(); + + auto& regs = maxwell3d.regs; + regs.transform_feedback_enabled = 1; + regs.transform_feedback.buffers[0].start_offset = 0; + regs.transform_feedback.buffers[1].start_offset = 0; + regs.transform_feedback.buffers[2].start_offset = 0; + regs.transform_feedback.buffers[3].start_offset = 0; + + regs.upload.line_length_in = 4; + regs.upload.line_count = 1; + regs.upload.dest.address_high = parameters[0]; + regs.upload.dest.address_low = parameters[1]; + maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); + maxwell3d.CallMethod(size_t(MAXWELL3D_REG_INDEX(inline_data)), regs.transform_feedback.controls[0].stride, true); + + maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address()); + } +}; + +} // Anonymous namespace + +HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} + +HLEMacro::~HLEMacro() = default; + +std::unique_ptr HLEMacro::GetHLEProgram(u64 hash) const { + // Compiler will make you a GREAT job at making an ad-hoc hash table :) + switch (hash) { + case 0x0D61FC9FAAC9FCADULL: return std::make_unique>(maxwell3d); + case 0x8A4D173EB99A8603ULL: return std::make_unique>(maxwell3d); + case 0x771BB18C62444DA0ULL: return std::make_unique>(maxwell3d); + case 0x0217920100488FF7ULL: return std::make_unique>(maxwell3d); + case 0x3F5E74B9C9A50164ULL: return std::make_unique(maxwell3d); + case 0xEAD26C3E2109B06BULL: return std::make_unique(maxwell3d); + case 0xC713C83D8F63CCF3ULL: return std::make_unique(maxwell3d); + case 0xD7333D26E0A93EDEULL: return std::make_unique(maxwell3d); + case 0xEB29B2A09AA06D38ULL: return std::make_unique(maxwell3d); + case 0xDB1341DBEB4C8AF7ULL: return std::make_unique(maxwell3d); + case 0x6C97861D891EDf7EULL: return std::make_unique>(maxwell3d); + case 0xD246FDDF3A6173D7ULL: return std::make_unique>(maxwell3d); + case 0xEE4D0004BEC8ECF4ULL: return std::make_unique(maxwell3d); + case 0xFC0CF27F5FFAA661ULL: return std::make_unique(maxwell3d); + case 0xB5F74EDB717278ECULL: return std::make_unique(maxwell3d); + default: + return nullptr; + } +} + +namespace { +class MacroInterpreterImpl final : public CachedMacro { +public: + explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_) + : CachedMacro(maxwell3d_) + , code{code_} + {} + + void Execute(const std::vector& params, u32 method) override; + +private: + /// Resets the execution engine state, zeroing registers, etc. + void Reset(); + + /** + * Executes a single macro instruction located at the current program counter. Returns whether + * the interpreter should keep running. + * + * @param is_delay_slot Whether the current step is being executed due to a delay slot in a + * previous instruction. + */ + bool Step(bool is_delay_slot); + + /// Calculates the result of an ALU operation. src_a OP src_b; + u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); + + /// Performs the result operation on the input result and stores it in the specified register + /// (if necessary). + void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); + + /// Evaluates the branch condition and returns whether the branch should be taken or not. + bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; + + /// Reads an opcode at the current program counter location. + Macro::Opcode GetOpcode() const; + + /// Returns the specified register's value. Register 0 is hardcoded to always return 0. + u32 GetRegister(u32 register_id) const; + + /// Sets the register to the input value. + void SetRegister(u32 register_id, u32 value); + + /// Sets the method address to use for the next Send instruction. + void SetMethodAddress(u32 address); + + /// Calls a GPU Engine method with the input parameter. + void Send(u32 value); + + /// Reads a GPU register located at the method address. + u32 Read(u32 method) const; + + /// Returns the next parameter in the parameter queue. + u32 FetchParameter(); + + /// Current program counter + u32 pc{}; + /// Program counter to execute at after the delay slot is executed. + std::optional delayed_pc; + + /// General purpose macro registers. + std::array registers = {}; + + /// Method address to use for the next Send instruction. + Macro::MethodAddress method_address = {}; + + /// Input parameters of the current macro. + std::unique_ptr parameters; + std::size_t num_parameters = 0; + std::size_t parameters_capacity = 0; + /// Index of the next parameter that will be fetched by the 'parm' instruction. + u32 next_parameter_index = 0; + + bool carry_flag = false; + const std::vector& code; +}; + +void MacroInterpreterImpl::Execute(const std::vector& params, u32 method) { + Reset(); + + registers[1] = params[0]; + num_parameters = params.size(); + + if (num_parameters > parameters_capacity) { + parameters_capacity = num_parameters; + parameters = std::make_unique(num_parameters); + } + std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32)); + + // Execute the code until we hit an exit condition. + bool keep_executing = true; + while (keep_executing) { + keep_executing = Step(false); + } + + // Assert the the macro used all the input parameters + ASSERT(next_parameter_index == num_parameters); +} + +void MacroInterpreterImpl::Reset() { + registers = {}; + pc = 0; + delayed_pc = {}; + method_address.raw = 0; + num_parameters = 0; + // The next parameter index starts at 1, because $r1 already has the value of the first + // parameter. + next_parameter_index = 1; + carry_flag = false; +} + +bool MacroInterpreterImpl::Step(bool is_delay_slot) { + u32 base_address = pc; + + Macro::Opcode opcode = GetOpcode(); + pc += 4; + + // Update the program counter if we were delayed + if (delayed_pc) { + ASSERT(is_delay_slot); + pc = *delayed_pc; + delayed_pc = {}; + } + + switch (opcode.operation) { + case Macro::Operation::ALU: { + u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), + GetRegister(opcode.src_b)); + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Macro::Operation::AddImmediate: { + ProcessResult(opcode.result_operation, opcode.dst, + GetRegister(opcode.src_a) + opcode.immediate); + break; + } + case Macro::Operation::ExtractInsert: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); + dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); + dst |= src << opcode.bf_dst_bit; + ProcessResult(opcode.result_operation, opcode.dst, dst); + break; + } + case Macro::Operation::ExtractShiftLeftImmediate: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; + + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Macro::Operation::ExtractShiftLeftRegister: { + u32 dst = GetRegister(opcode.src_a); + u32 src = GetRegister(opcode.src_b); + + u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; + + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Macro::Operation::Read: { + u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); + ProcessResult(opcode.result_operation, opcode.dst, result); + break; + } + case Macro::Operation::Branch: { + ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); + u32 value = GetRegister(opcode.src_a); + bool taken = EvaluateBranchCondition(opcode.branch_condition, value); + if (taken) { + // Ignore the delay slot if the branch has the annul bit. + if (opcode.branch_annul) { + pc = base_address + opcode.GetBranchTarget(); + return true; + } + + delayed_pc = base_address + opcode.GetBranchTarget(); + // Execute one more instruction due to the delay slot. + return Step(true); + } + break; + } + default: + UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value()); + break; + } + + // An instruction with the Exit flag will not actually + // cause an exit if it's executed inside a delay slot. + if (opcode.is_exit && !is_delay_slot) { + // Exit has a delay slot, execute the next instruction + Step(true); + return false; + } + + return true; +} + +u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) { + switch (operation) { + case Macro::ALUOperation::Add: { + const u64 result{static_cast(src_a) + src_b}; + carry_flag = result > 0xffffffff; + return static_cast(result); + } + case Macro::ALUOperation::AddWithCarry: { + const u64 result{static_cast(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)}; + carry_flag = result > 0xffffffff; + return static_cast(result); + } + case Macro::ALUOperation::Subtract: { + const u64 result{static_cast(src_a) - src_b}; + carry_flag = result < 0x100000000; + return static_cast(result); + } + case Macro::ALUOperation::SubtractWithBorrow: { + const u64 result{static_cast(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)}; + carry_flag = result < 0x100000000; + return static_cast(result); + } + case Macro::ALUOperation::Xor: + return src_a ^ src_b; + case Macro::ALUOperation::Or: + return src_a | src_b; + case Macro::ALUOperation::And: + return src_a & src_b; + case Macro::ALUOperation::AndNot: + return src_a & ~src_b; + case Macro::ALUOperation::Nand: + return ~(src_a & src_b); + + default: + UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation); + return 0; + } +} + +void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) { + switch (operation) { + case Macro::ResultOperation::IgnoreAndFetch: + // Fetch parameter and ignore result. + SetRegister(reg, FetchParameter()); + break; + case Macro::ResultOperation::Move: + // Move result. + SetRegister(reg, result); + break; + case Macro::ResultOperation::MoveAndSetMethod: + // Move result and use as Method Address. + SetRegister(reg, result); + SetMethodAddress(result); + break; + case Macro::ResultOperation::FetchAndSend: + // Fetch parameter and send result. + SetRegister(reg, FetchParameter()); + Send(result); + break; + case Macro::ResultOperation::MoveAndSend: + // Move and send result. + SetRegister(reg, result); + Send(result); + break; + case Macro::ResultOperation::FetchAndSetMethod: + // Fetch parameter and use result as Method Address. + SetRegister(reg, FetchParameter()); + SetMethodAddress(result); + break; + case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: + // Move result and use as Method Address, then fetch and send parameter. + SetRegister(reg, result); + SetMethodAddress(result); + Send(FetchParameter()); + break; + case Macro::ResultOperation::MoveAndSetMethodSend: + // Move result and use as Method Address, then send bits 12:17 of result. + SetRegister(reg, result); + SetMethodAddress(result); + Send((result >> 12) & 0b111111); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation); + break; + } +} + +bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const { + switch (cond) { + case Macro::BranchCondition::Zero: + return value == 0; + case Macro::BranchCondition::NotZero: + return value != 0; + } + UNREACHABLE(); +} + +Macro::Opcode MacroInterpreterImpl::GetOpcode() const { + ASSERT((pc % sizeof(u32)) == 0); + ASSERT(pc < code.size() * sizeof(u32)); + return {code[pc / sizeof(u32)]}; +} + +u32 MacroInterpreterImpl::GetRegister(u32 register_id) const { + return registers.at(register_id); +} + +void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) { + // Register 0 is hardwired as the zero register. + // Ensure no writes to it actually occur. + if (register_id == 0) { + return; + } + + registers.at(register_id) = value; +} + +void MacroInterpreterImpl::SetMethodAddress(u32 address) { + method_address.raw = address; +} + +void MacroInterpreterImpl::Send(u32 value) { + maxwell3d.CallMethod(method_address.address, value, true); + // Increment the method address by the method increment. + method_address.address.Assign(method_address.address.Value() + + method_address.increment.Value()); +} + +u32 MacroInterpreterImpl::Read(u32 method) const { + return maxwell3d.GetRegisterValue(method); +} + +u32 MacroInterpreterImpl::FetchParameter() { + ASSERT(next_parameter_index < num_parameters); + return parameters[next_parameter_index++]; +} +} // Anonymous namespace + +#ifdef ARCHITECTURE_x86_64 +namespace { +constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; +constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d; +constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11; +constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; +constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; +constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; + +constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ + STATE, + RESULT, + MAX_PARAMETER, + PARAMETERS, + METHOD_ADDRESS, + BRANCH_HOLDER, +}); + +// Arbitrarily chosen based on current booting games. +constexpr size_t MAX_CODE_SIZE = 0x10000; + +std::bitset<32> PersistentCallerSavedRegs() { + return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; +} + +/// @brief Must enforce W^X constraints, as we yet don't havea global "NO_EXECUTE" support flag +/// the speed loss is minimal, and in fact may be negligible, however for your peace of mind +/// I simply included known OSes whom had W^X issues +#if defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) +static const auto default_cg_mode = Xbyak::DontSetProtectRWE; +#else +static const auto default_cg_mode = nullptr; //Allow RWE +#endif + +class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { +public: + explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_) + : Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode) + , CachedMacro(maxwell3d_) + , code{code_} + { + Compile(); + } + + void Execute(const std::vector& parameters, u32 method) override; + + void Compile_ALU(Macro::Opcode opcode); + void Compile_AddImmediate(Macro::Opcode opcode); + void Compile_ExtractInsert(Macro::Opcode opcode); + void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); + void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); + void Compile_Read(Macro::Opcode opcode); + void Compile_Branch(Macro::Opcode opcode); + +private: + void Optimizer_ScanFlags(); + + void Compile(); + bool Compile_NextInstruction(); + + Xbyak::Reg32 Compile_FetchParameter(); + Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); + + void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); + void Compile_Send(Xbyak::Reg32 value); + + Macro::Opcode GetOpCode() const; + + struct JITState { + Engines::Maxwell3D* maxwell3d{}; + std::array registers{}; + u32 carry_flag{}; + }; + static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); + using ProgramType = void (*)(JITState*, const u32*, const u32*); + + struct OptimizerState { + bool can_skip_carry{}; + bool has_delayed_pc{}; + bool zero_reg_skip{}; + bool skip_dummy_addimmediate{}; + bool optimize_for_method_move{}; + bool enable_asserts{}; + }; + OptimizerState optimizer{}; + + std::optional next_opcode{}; + ProgramType program{nullptr}; + + std::array labels; + std::array delay_skip; + Xbyak::Label end_of_code{}; + + bool is_delay_slot{}; + u32 pc{}; + + const std::vector& code; +}; + +void MacroJITx64Impl::Execute(const std::vector& parameters, u32 method) { + ASSERT_OR_EXECUTE(program != nullptr, { return; }); + JITState state{}; + state.maxwell3d = &maxwell3d; + state.registers = {}; + program(&state, parameters.data(), parameters.data() + parameters.size()); +} + +void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { + const bool is_a_zero = opcode.src_a == 0; + const bool is_b_zero = opcode.src_b == 0; + const bool valid_operation = !is_a_zero && !is_b_zero; + [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero; + const bool has_zero_register = is_a_zero || is_b_zero; + const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || + opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; + + Xbyak::Reg32 src_a; + Xbyak::Reg32 src_b; + + if (!optimizer.zero_reg_skip || no_zero_reg_skip) { + src_a = Compile_GetRegister(opcode.src_a, RESULT); + src_b = Compile_GetRegister(opcode.src_b, eax); + } else { + if (!is_a_zero) { + src_a = Compile_GetRegister(opcode.src_a, RESULT); + } + if (!is_b_zero) { + src_b = Compile_GetRegister(opcode.src_b, eax); + } + } + + bool has_emitted = false; + + switch (opcode.alu_operation) { + case Macro::ALUOperation::Add: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + add(src_a, src_b); + } + } else { + add(src_a, src_b); + } + + if (!optimizer.can_skip_carry) { + setc(byte[STATE + offsetof(JITState, carry_flag)]); + } + break; + case Macro::ALUOperation::AddWithCarry: + bt(dword[STATE + offsetof(JITState, carry_flag)], 0); + adc(src_a, src_b); + setc(byte[STATE + offsetof(JITState, carry_flag)]); + break; + case Macro::ALUOperation::Subtract: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + sub(src_a, src_b); + has_emitted = true; + } + } else { + sub(src_a, src_b); + has_emitted = true; + } + if (!optimizer.can_skip_carry && has_emitted) { + setc(byte[STATE + offsetof(JITState, carry_flag)]); + } + break; + case Macro::ALUOperation::SubtractWithBorrow: + bt(dword[STATE + offsetof(JITState, carry_flag)], 0); + sbb(src_a, src_b); + setc(byte[STATE + offsetof(JITState, carry_flag)]); + break; + case Macro::ALUOperation::Xor: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + xor_(src_a, src_b); + } + } else { + xor_(src_a, src_b); + } + break; + case Macro::ALUOperation::Or: + if (optimizer.zero_reg_skip) { + if (valid_operation) { + or_(src_a, src_b); + } + } else { + or_(src_a, src_b); + } + break; + case Macro::ALUOperation::And: + if (optimizer.zero_reg_skip) { + if (!has_zero_register) { + and_(src_a, src_b); + } + } else { + and_(src_a, src_b); + } + break; + case Macro::ALUOperation::AndNot: + if (optimizer.zero_reg_skip) { + if (!is_a_zero) { + not_(src_b); + and_(src_a, src_b); + } + } else { + not_(src_b); + and_(src_a, src_b); + } + break; + case Macro::ALUOperation::Nand: + if (optimizer.zero_reg_skip) { + if (!is_a_zero) { + and_(src_a, src_b); + not_(src_a); + } + } else { + and_(src_a, src_b); + not_(src_a); + } + break; + default: + UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value()); + break; + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { + if (optimizer.skip_dummy_addimmediate) { + // Games tend to use this as an exit instruction placeholder. It's to encode an instruction + // without doing anything. In our case we can just not emit anything. + if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) { + return; + } + } + // Check for redundant moves + if (optimizer.optimize_for_method_move && + opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { + if (next_opcode.has_value()) { + const auto next = *next_opcode; + if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod && + opcode.dst == next.dst) { + return; + } + } + } + if (optimizer.zero_reg_skip && opcode.src_a == 0) { + if (opcode.immediate == 0) { + xor_(RESULT, RESULT); + } else { + mov(RESULT, opcode.immediate); + } + } else { + auto result = Compile_GetRegister(opcode.src_a, RESULT); + if (opcode.immediate > 2) { + add(result, opcode.immediate); + } else if (opcode.immediate == 1) { + inc(result); + } else if (opcode.immediate < 0) { + sub(result, opcode.immediate * -1); + } + } + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { + auto dst = Compile_GetRegister(opcode.src_a, RESULT); + auto src = Compile_GetRegister(opcode.src_b, eax); + + const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); + and_(dst, mask); + shr(src, opcode.bf_src_bit); + and_(src, opcode.GetBitfieldMask()); + shl(src, opcode.bf_dst_bit); + or_(dst, src); + + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { + const auto dst = Compile_GetRegister(opcode.src_a, ecx); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); + + shr(src, dst.cvt8()); + and_(src, opcode.GetBitfieldMask()); + shl(src, opcode.bf_dst_bit); + + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { + const auto dst = Compile_GetRegister(opcode.src_a, ecx); + const auto src = Compile_GetRegister(opcode.src_b, RESULT); + + shr(src, opcode.bf_src_bit); + and_(src, opcode.GetBitfieldMask()); + shl(src, dst.cvt8()); + + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { + if (optimizer.zero_reg_skip && opcode.src_a == 0) { + if (opcode.immediate == 0) { + xor_(RESULT, RESULT); + } else { + mov(RESULT, opcode.immediate); + } + } else { + auto result = Compile_GetRegister(opcode.src_a, RESULT); + if (opcode.immediate > 2) { + add(result, opcode.immediate); + } else if (opcode.immediate == 1) { + inc(result); + } else if (opcode.immediate < 0) { + sub(result, opcode.immediate * -1); + } + } + + // Equivalent to Engines::Maxwell3D::GetRegisterValue: + if (optimizer.enable_asserts) { + Xbyak::Label pass_range_check; + cmp(RESULT, static_cast(Engines::Maxwell3D::Regs::NUM_REGS)); + jb(pass_range_check); + int3(); + L(pass_range_check); + } + mov(rax, qword[STATE]); + mov(RESULT, + dword[rax + offsetof(Engines::Maxwell3D, regs) + + offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]); + + Compile_ProcessResult(opcode.result_operation, opcode.dst); +} + +void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { + maxwell3d->CallMethod(method_address.address, value, true); +} + +void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(Common::X64::ABI_PARAM1, qword[STATE]); + mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS); + mov(Common::X64::ABI_PARAM3.cvt32(), value); + Common::X64::CallFarFunction(*this, &Send); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + + Xbyak::Label dont_process{}; + // Get increment + test(METHOD_ADDRESS, 0x3f000); + // If zero, method address doesn't update + je(dont_process); + + mov(ecx, METHOD_ADDRESS); + and_(METHOD_ADDRESS, 0xfff); + shr(ecx, 12); + and_(ecx, 0x3f); + lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); + sal(ecx, 12); + or_(eax, ecx); + + mov(METHOD_ADDRESS, eax); + + L(dont_process); +} + +void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { + ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); + const s32 jump_address = + static_cast(pc) + static_cast(opcode.GetBranchTarget() / sizeof(s32)); + + Xbyak::Label end; + auto value = Compile_GetRegister(opcode.src_a, eax); + cmp(value, 0); // test(value, value); + if (optimizer.has_delayed_pc) { + switch (opcode.branch_condition) { + case Macro::BranchCondition::Zero: + jne(end, T_NEAR); + break; + case Macro::BranchCondition::NotZero: + je(end, T_NEAR); + break; + } + + if (opcode.branch_annul) { + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(labels[jump_address], T_NEAR); + } else { + Xbyak::Label handle_post_exit{}; + Xbyak::Label skip{}; + jmp(skip, T_NEAR); + + L(handle_post_exit); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(labels[jump_address], T_NEAR); + + L(skip); + mov(BRANCH_HOLDER, handle_post_exit); + jmp(delay_skip[pc], T_NEAR); + } + } else { + switch (opcode.branch_condition) { + case Macro::BranchCondition::Zero: + je(labels[jump_address], T_NEAR); + break; + case Macro::BranchCondition::NotZero: + jne(labels[jump_address], T_NEAR); + break; + } + } + + L(end); +} + +void MacroJITx64Impl::Optimizer_ScanFlags() { + optimizer.can_skip_carry = true; + optimizer.has_delayed_pc = false; + for (auto raw_op : code) { + Macro::Opcode op{}; + op.raw = raw_op; + + if (op.operation == Macro::Operation::ALU) { + // Scan for any ALU operations which actually use the carry flag, if they don't exist in + // our current code we can skip emitting the carry flag handling operations + if (op.alu_operation == Macro::ALUOperation::AddWithCarry || + op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) { + optimizer.can_skip_carry = false; + } + } + + if (op.operation == Macro::Operation::Branch) { + if (!op.branch_annul) { + optimizer.has_delayed_pc = true; + } + } + } +} + +void MacroJITx64Impl::Compile() { + labels.fill(Xbyak::Label()); + + Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + // JIT state + mov(STATE, Common::X64::ABI_PARAM1); + mov(PARAMETERS, Common::X64::ABI_PARAM2); + mov(MAX_PARAMETER, Common::X64::ABI_PARAM3); + xor_(RESULT, RESULT); + xor_(METHOD_ADDRESS, METHOD_ADDRESS); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + + mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); + + // Track get register for zero registers and mark it as no-op + optimizer.zero_reg_skip = true; + + // AddImmediate tends to be used as a NOP instruction, if we detect this we can + // completely skip the entire code path and no emit anything + optimizer.skip_dummy_addimmediate = true; + + // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting + // one if our register isn't "dirty" + optimizer.optimize_for_method_move = true; + + // Enable run-time assertions in JITted code + optimizer.enable_asserts = false; + + // Check to see if we can skip emitting certain instructions + Optimizer_ScanFlags(); + + const u32 op_count = static_cast(code.size()); + for (u32 i = 0; i < op_count; i++) { + if (i < op_count - 1) { + pc = i + 1; + next_opcode = GetOpCode(); + } else { + next_opcode = {}; + } + pc = i; + Compile_NextInstruction(); + } + + L(end_of_code); + + Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); + ret(); + ready(); + program = getCode(); +} + +bool MacroJITx64Impl::Compile_NextInstruction() { + const auto opcode = GetOpCode(); + if (labels[pc].getAddress()) { + return false; + } + + L(labels[pc]); + + switch (opcode.operation) { + case Macro::Operation::ALU: + Compile_ALU(opcode); + break; + case Macro::Operation::AddImmediate: + Compile_AddImmediate(opcode); + break; + case Macro::Operation::ExtractInsert: + Compile_ExtractInsert(opcode); + break; + case Macro::Operation::ExtractShiftLeftImmediate: + Compile_ExtractShiftLeftImmediate(opcode); + break; + case Macro::Operation::ExtractShiftLeftRegister: + Compile_ExtractShiftLeftRegister(opcode); + break; + case Macro::Operation::Read: + Compile_Read(opcode); + break; + case Macro::Operation::Branch: + Compile_Branch(opcode); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value()); + break; + } + + if (optimizer.has_delayed_pc) { + if (opcode.is_exit) { + mov(rax, end_of_code); + test(BRANCH_HOLDER, BRANCH_HOLDER); + cmove(BRANCH_HOLDER, rax); + // Jump to next instruction to skip delay slot check + je(labels[pc + 1], T_NEAR); + } else { + // TODO(ogniK): Optimize delay slot branching + Xbyak::Label no_delay_slot{}; + test(BRANCH_HOLDER, BRANCH_HOLDER); + je(no_delay_slot, T_NEAR); + mov(rax, BRANCH_HOLDER); + xor_(BRANCH_HOLDER, BRANCH_HOLDER); + jmp(rax); + L(no_delay_slot); + } + L(delay_skip[pc]); + if (opcode.is_exit) { + return false; + } + } else { + test(BRANCH_HOLDER, BRANCH_HOLDER); + jne(end_of_code, T_NEAR); + if (opcode.is_exit) { + inc(BRANCH_HOLDER); + return false; + } + } + return true; +} + +static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) { + LOG_CRITICAL(HW_GPU, + "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)", + parameter, max_parameter - sizeof(u32)); +} + +Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { + Xbyak::Label parameter_ok{}; + cmp(PARAMETERS, MAX_PARAMETER); + jb(parameter_ok, T_NEAR); + Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + mov(Common::X64::ABI_PARAM1, PARAMETERS); + mov(Common::X64::ABI_PARAM2, MAX_PARAMETER); + Common::X64::CallFarFunction(*this, &WarnInvalidParameter); + Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); + L(parameter_ok); + mov(eax, dword[PARAMETERS]); + add(PARAMETERS, sizeof(u32)); + return eax; +} + +Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { + if (index == 0) { + // Register 0 is always zero + xor_(dst, dst); + } else { + mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); + } + + return dst; +} + +void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { + const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) { + // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero + // register. + if (reg_index == 0) { + return; + } + mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result); + }; + const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); }; + + switch (operation) { + case Macro::ResultOperation::IgnoreAndFetch: + SetRegister(reg, Compile_FetchParameter()); + break; + case Macro::ResultOperation::Move: + SetRegister(reg, RESULT); + break; + case Macro::ResultOperation::MoveAndSetMethod: + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + break; + case Macro::ResultOperation::FetchAndSend: + // Fetch parameter and send result. + SetRegister(reg, Compile_FetchParameter()); + Compile_Send(RESULT); + break; + case Macro::ResultOperation::MoveAndSend: + // Move and send result. + SetRegister(reg, RESULT); + Compile_Send(RESULT); + break; + case Macro::ResultOperation::FetchAndSetMethod: + // Fetch parameter and use result as Method Address. + SetRegister(reg, Compile_FetchParameter()); + SetMethodAddress(RESULT); + break; + case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: + // Move result and use as Method Address, then fetch and send parameter. + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + Compile_Send(Compile_FetchParameter()); + break; + case Macro::ResultOperation::MoveAndSetMethodSend: + // Move result and use as Method Address, then send bits 12:17 of result. + SetRegister(reg, RESULT); + SetMethodAddress(RESULT); + shr(RESULT, 12); + and_(RESULT, 0b111111); + Compile_Send(RESULT); + break; + default: + UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation); + break; + } +} + +Macro::Opcode MacroJITx64Impl::GetOpCode() const { + ASSERT(pc < code.size()); + return {code[pc]}; +} +} // Anonymous namespace +#endif + +static void Dump(u64 hash, std::span code, bool decompiled = false) { + const auto base_dir{Common::FS::GetEdenPath(Common::FS::EdenPath::DumpDir)}; + const auto macro_dir{base_dir / "macros"}; + if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories"); + return; + } + auto name{macro_dir / fmt::format("{:016x}.macro", hash)}; + + if (decompiled) { + auto new_name{macro_dir / fmt::format("decompiled_{:016x}.macro", hash)}; + if (Common::FS::Exists(name)) { + (void)Common::FS::RenameFile(name, new_name); + return; + } + name = new_name; + } + + std::fstream macro_file(name, std::ios::out | std::ios::binary); + if (!macro_file) { + LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}", Common::FS::PathToUTF8String(name)); + return; + } + macro_file.write(reinterpret_cast(code.data()), code.size_bytes()); +} + +MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_, bool is_interpreted_) + : hle_macros{std::make_optional(maxwell3d_)} + , maxwell3d{maxwell3d_} + , is_interpreted{is_interpreted_} +{} + +MacroEngine::~MacroEngine() = default; + +void MacroEngine::AddCode(u32 method, u32 data) { + uploaded_macro_code[method].push_back(data); +} + +void MacroEngine::ClearCode(u32 method) { + macro_cache.erase(method); + uploaded_macro_code.erase(method); +} + +void MacroEngine::Execute(u32 method, const std::vector& parameters) { + auto compiled_macro = macro_cache.find(method); + if (compiled_macro != macro_cache.end()) { + const auto& cache_info = compiled_macro->second; + if (cache_info.has_hle_program) { + cache_info.hle_program->Execute(parameters, method); + } else { + maxwell3d.RefreshParameters(); + cache_info.lle_program->Execute(parameters, method); + } + } else { + // Macro not compiled, check if it's uploaded and if so, compile it + std::optional mid_method; + const auto macro_code = uploaded_macro_code.find(method); + if (macro_code == uploaded_macro_code.end()) { + for (const auto& [method_base, code] : uploaded_macro_code) { + if (method >= method_base && (method - method_base) < code.size()) { + mid_method = method_base; + break; + } + } + if (!mid_method.has_value()) { + ASSERT_MSG(false, "Macro 0x{0:x} was not uploaded", method); + return; + } + } + auto& cache_info = macro_cache[method]; + + if (!mid_method.has_value()) { + cache_info.lle_program = Compile(macro_code->second); + cache_info.hash = Common::HashValue(macro_code->second); + } else { + const auto& macro_cached = uploaded_macro_code[mid_method.value()]; + const auto rebased_method = method - mid_method.value(); + auto& code = uploaded_macro_code[method]; + code.resize(macro_cached.size() - rebased_method); + std::memcpy(code.data(), macro_cached.data() + rebased_method, code.size() * sizeof(u32)); + cache_info.hash = Common::HashValue(code); + cache_info.lle_program = Compile(code); + } + + auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); + if (!hle_program || Settings::values.disable_macro_hle) { + maxwell3d.RefreshParameters(); + cache_info.lle_program->Execute(parameters, method); + } else { + cache_info.has_hle_program = true; + cache_info.hle_program = std::move(hle_program); + cache_info.hle_program->Execute(parameters, method); + } + + if (Settings::values.dump_macros) { + Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program); + } + } +} + +std::unique_ptr MacroEngine::Compile(const std::vector& code) { +#ifdef ARCHITECTURE_x86_64 + if (!is_interpreted) + return std::make_unique(maxwell3d, code); +#endif + return std::make_unique(maxwell3d, code); +} + +std::optional GetMacroEngine(Engines::Maxwell3D& maxwell3d) { +#ifdef ARCHITECTURE_x86_64 + return std::make_optional(maxwell3d, bool(Settings::values.disable_macro_jit)); +#else + return std::make_optional(maxwell3d, true); +#endif +} + +} // namespace Tegra diff --git a/src/video_core/macro/macro.h b/src/video_core/macro.h similarity index 74% rename from src/video_core/macro/macro.h rename to src/video_core/macro.h index 737ced9a45..685097a693 100644 --- a/src/video_core/macro/macro.h +++ b/src/video_core/macro.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -95,24 +98,34 @@ union MethodAddress { } // namespace Macro -class HLEMacro; - class CachedMacro { public: + CachedMacro(Engines::Maxwell3D& maxwell3d_) + : maxwell3d{maxwell3d_} + {} virtual ~CachedMacro() = default; - /** - * Executes the macro code with the specified input parameters. - * - * @param parameters The parameters of the macro - * @param method The method to execute - */ + /// Executes the macro code with the specified input parameters. + /// @param parameters The parameters of the macro + /// @param method The method to execute virtual void Execute(const std::vector& parameters, u32 method) = 0; + Engines::Maxwell3D& maxwell3d; +}; + +class HLEMacro { +public: + explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); + ~HLEMacro(); + // Allocates and returns a cached macro if the hash matches a known function. + // Returns nullptr otherwise. + [[nodiscard]] std::unique_ptr GetHLEProgram(u64 hash) const; +private: + Engines::Maxwell3D& maxwell3d; }; class MacroEngine { public: - explicit MacroEngine(Engines::Maxwell3D& maxwell3d); - virtual ~MacroEngine(); + explicit MacroEngine(Engines::Maxwell3D& maxwell3d, bool is_interpreted); + ~MacroEngine(); // Store the uploaded macro code to compile them when they're called. void AddCode(u32 method, u32 data); @@ -124,7 +137,7 @@ public: void Execute(u32 method, const std::vector& parameters); protected: - virtual std::unique_ptr Compile(const std::vector& code) = 0; + std::unique_ptr Compile(const std::vector& code); private: struct CacheInfo { @@ -136,10 +149,11 @@ private: std::unordered_map macro_cache; std::unordered_map> uploaded_macro_code; - std::unique_ptr hle_macros; + std::optional hle_macros; Engines::Maxwell3D& maxwell3d; + bool is_interpreted; }; -std::unique_ptr GetMacroEngine(Engines::Maxwell3D& maxwell3d); +std::optional GetMacroEngine(Engines::Maxwell3D& maxwell3d); } // namespace Tegra diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp deleted file mode 100644 index 2ff5e21c5e..0000000000 --- a/src/video_core/macro/macro.cpp +++ /dev/null @@ -1,140 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include -#include - -#include "common/container_hash.h" - -#include -#include "common/assert.h" -#include "common/fs/fs.h" -#include "common/fs/path_util.h" -#include "common/settings.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/macro/macro.h" -#include "video_core/macro/macro_hle.h" -#include "video_core/macro/macro_interpreter.h" - -#ifdef ARCHITECTURE_x86_64 -#include "video_core/macro/macro_jit_x64.h" -#endif - -namespace Tegra { - -static void Dump(u64 hash, std::span code, bool decompiled = false) { - const auto base_dir{Common::FS::GetEdenPath(Common::FS::EdenPath::DumpDir)}; - const auto macro_dir{base_dir / "macros"}; - if (!Common::FS::CreateDir(base_dir) || !Common::FS::CreateDir(macro_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create macro dump directories"); - return; - } - auto name{macro_dir / fmt::format("{:016x}.macro", hash)}; - - if (decompiled) { - auto new_name{macro_dir / fmt::format("decompiled_{:016x}.macro", hash)}; - if (Common::FS::Exists(name)) { - (void)Common::FS::RenameFile(name, new_name); - return; - } - name = new_name; - } - - std::fstream macro_file(name, std::ios::out | std::ios::binary); - if (!macro_file) { - LOG_ERROR(Common_Filesystem, "Unable to open or create file at {}", - Common::FS::PathToUTF8String(name)); - return; - } - macro_file.write(reinterpret_cast(code.data()), code.size_bytes()); -} - -MacroEngine::MacroEngine(Engines::Maxwell3D& maxwell3d_) - : hle_macros{std::make_unique(maxwell3d_)}, maxwell3d{maxwell3d_} {} - -MacroEngine::~MacroEngine() = default; - -void MacroEngine::AddCode(u32 method, u32 data) { - uploaded_macro_code[method].push_back(data); -} - -void MacroEngine::ClearCode(u32 method) { - macro_cache.erase(method); - uploaded_macro_code.erase(method); -} - -void MacroEngine::Execute(u32 method, const std::vector& parameters) { - auto compiled_macro = macro_cache.find(method); - if (compiled_macro != macro_cache.end()) { - const auto& cache_info = compiled_macro->second; - if (cache_info.has_hle_program) { - cache_info.hle_program->Execute(parameters, method); - } else { - maxwell3d.RefreshParameters(); - cache_info.lle_program->Execute(parameters, method); - } - } else { - // Macro not compiled, check if it's uploaded and if so, compile it - std::optional mid_method; - const auto macro_code = uploaded_macro_code.find(method); - if (macro_code == uploaded_macro_code.end()) { - for (const auto& [method_base, code] : uploaded_macro_code) { - if (method >= method_base && (method - method_base) < code.size()) { - mid_method = method_base; - break; - } - } - if (!mid_method.has_value()) { - ASSERT_MSG(false, "Macro 0x{0:x} was not uploaded", method); - return; - } - } - auto& cache_info = macro_cache[method]; - - if (!mid_method.has_value()) { - cache_info.lle_program = Compile(macro_code->second); - cache_info.hash = Common::HashValue(macro_code->second); - } else { - const auto& macro_cached = uploaded_macro_code[mid_method.value()]; - const auto rebased_method = method - mid_method.value(); - auto& code = uploaded_macro_code[method]; - code.resize(macro_cached.size() - rebased_method); - std::memcpy(code.data(), macro_cached.data() + rebased_method, - code.size() * sizeof(u32)); - cache_info.hash = Common::HashValue(code); - cache_info.lle_program = Compile(code); - } - - auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); - if (!hle_program || Settings::values.disable_macro_hle) { - maxwell3d.RefreshParameters(); - cache_info.lle_program->Execute(parameters, method); - } else { - cache_info.has_hle_program = true; - cache_info.hle_program = std::move(hle_program); - cache_info.hle_program->Execute(parameters, method); - } - - if (Settings::values.dump_macros) { - Dump(cache_info.hash, macro_code->second, cache_info.has_hle_program); - } - } -} - -std::unique_ptr GetMacroEngine(Engines::Maxwell3D& maxwell3d) { - if (Settings::values.disable_macro_jit) { - return std::make_unique(maxwell3d); - } -#ifdef ARCHITECTURE_x86_64 - return std::make_unique(maxwell3d); -#else - return std::make_unique(maxwell3d); -#endif -} - -} // namespace Tegra diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp deleted file mode 100644 index 2f41e806c2..0000000000 --- a/src/video_core/macro/macro_hle.cpp +++ /dev/null @@ -1,606 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -#include -#include -#include "common/assert.h" -#include "common/scope_exit.h" -#include "video_core/dirty_flags.h" -#include "video_core/engines/draw_manager.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/macro/macro.h" -#include "video_core/macro/macro_hle.h" -#include "video_core/memory_manager.h" -#include "video_core/rasterizer_interface.h" - -namespace Tegra { - -using Maxwell3D = Engines::Maxwell3D; - -namespace { - -bool IsTopologySafe(Maxwell3D::Regs::PrimitiveTopology topology) { - switch (topology) { - case Maxwell3D::Regs::PrimitiveTopology::Points: - case Maxwell3D::Regs::PrimitiveTopology::Lines: - case Maxwell3D::Regs::PrimitiveTopology::LineLoop: - case Maxwell3D::Regs::PrimitiveTopology::LineStrip: - case Maxwell3D::Regs::PrimitiveTopology::Triangles: - case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: - case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: - case Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: - case Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: - case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: - case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: - case Maxwell3D::Regs::PrimitiveTopology::Patches: - return true; - case Maxwell3D::Regs::PrimitiveTopology::Quads: - case Maxwell3D::Regs::PrimitiveTopology::QuadStrip: - case Maxwell3D::Regs::PrimitiveTopology::Polygon: - default: - return false; - } -} - -class HLEMacroImpl : public CachedMacro { -public: - explicit HLEMacroImpl(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} {} - -protected: - Maxwell3D& maxwell3d; -}; - -/* - * @note: these macros have two versions, a normal and extended version, with the extended version - * also assigning the base vertex/instance. - */ -template -class HLE_DrawArraysIndirect final : public HLEMacroImpl { -public: - explicit HLE_DrawArraysIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - auto topology = static_cast(parameters[0]); - if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { - Fallback(parameters); - return; - } - - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_byte_count = false; - params.is_indexed = false; - params.include_count = false; - params.count_start_address = 0; - params.indirect_start_address = maxwell3d.GetMacroAddress(1); - params.buffer_size = 4 * sizeof(u32); - params.max_draw_counts = 1; - params.stride = 0; - - if constexpr (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - } - - maxwell3d.draw_manager->DrawArrayIndirect(topology); - - if constexpr (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } - } - -private: - void Fallback(const std::vector& parameters) { - SCOPE_EXIT { - if (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } - }; - maxwell3d.RefreshParameters(); - const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); - - auto topology = static_cast(parameters[0]); - const u32 vertex_first = parameters[3]; - const u32 vertex_count = parameters[1]; - - if (!IsTopologySafe(topology) && - static_cast(maxwell3d.GetMaxCurrentVertices()) < - static_cast(vertex_first) + static_cast(vertex_count)) { - ASSERT_MSG(false, "Faulty draw!"); - return; - } - - const u32 base_instance = parameters[4]; - if constexpr (extended) { - maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - } - - maxwell3d.draw_manager->DrawArray(topology, vertex_first, vertex_count, base_instance, - instance_count); - - if constexpr (extended) { - maxwell3d.regs.global_base_instance_index = 0; - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } - } -}; - -/* - * @note: these macros have two versions, a normal and extended version, with the extended version - * also assigning the base vertex/instance. - */ -template -class HLE_DrawIndexedIndirect final : public HLEMacroImpl { -public: - explicit HLE_DrawIndexedIndirect(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - auto topology = static_cast(parameters[0]); - if (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology)) { - Fallback(parameters); - return; - } - - const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); - const u32 element_base = parameters[4]; - const u32 base_instance = parameters[5]; - maxwell3d.regs.vertex_id_base = element_base; - maxwell3d.regs.global_base_vertex_index = element_base; - maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - if constexpr (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - } - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_byte_count = false; - params.is_indexed = true; - params.include_count = false; - params.count_start_address = 0; - params.indirect_start_address = maxwell3d.GetMacroAddress(1); - params.buffer_size = 5 * sizeof(u32); - params.max_draw_counts = 1; - params.stride = 0; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); - maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.regs.global_base_vertex_index = 0x0; - maxwell3d.regs.global_base_instance_index = 0x0; - if constexpr (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } - } - -private: - void Fallback(const std::vector& parameters) { - maxwell3d.RefreshParameters(); - const u32 instance_count = (maxwell3d.GetRegisterValue(0xD1B) & parameters[2]); - const u32 element_base = parameters[4]; - const u32 base_instance = parameters[5]; - maxwell3d.regs.vertex_id_base = element_base; - maxwell3d.regs.global_base_vertex_index = element_base; - maxwell3d.regs.global_base_instance_index = base_instance; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - if constexpr (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - } - - maxwell3d.draw_manager->DrawIndex( - static_cast(parameters[0]), parameters[3], - parameters[1], element_base, base_instance, instance_count); - - maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.regs.global_base_vertex_index = 0x0; - maxwell3d.regs.global_base_instance_index = 0x0; - if constexpr (extended) { - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } - } -}; - -class HLE_MultiLayerClear final : public HLEMacroImpl { -public: - explicit HLE_MultiLayerClear(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - ASSERT(parameters.size() == 1); - - const Maxwell3D::Regs::ClearSurface clear_params{parameters[0]}; - const u32 rt_index = clear_params.RT; - const u32 num_layers = maxwell3d.regs.rt[rt_index].depth; - ASSERT(clear_params.layer == 0); - - maxwell3d.regs.clear_surface.raw = clear_params.raw; - maxwell3d.draw_manager->Clear(num_layers); - } -}; - -class HLE_MultiDrawIndexedIndirectCount final : public HLEMacroImpl { -public: - explicit HLE_MultiDrawIndexedIndirectCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - const auto topology = static_cast(parameters[2]); - if (!IsTopologySafe(topology)) { - Fallback(parameters); - return; - } - - const u32 start_indirect = parameters[0]; - const u32 end_indirect = parameters[1]; - if (start_indirect >= end_indirect) { - // Nothing to do. - return; - } - - const u32 padding = parameters[3]; // padding is in words - - // size of each indirect segment - const u32 indirect_words = 5 + padding; - const u32 stride = indirect_words * sizeof(u32); - const std::size_t draw_count = end_indirect - start_indirect; - const u32 estimate = static_cast(maxwell3d.EstimateIndexBufferSize()); - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_byte_count = false; - params.is_indexed = true; - params.include_count = true; - params.count_start_address = maxwell3d.GetMacroAddress(4); - params.indirect_start_address = maxwell3d.GetMacroAddress(5); - params.buffer_size = stride * draw_count; - params.max_draw_counts = draw_count; - params.stride = stride; - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - maxwell3d.SetHLEReplacementAttributeType(0, 0x648, - Maxwell3D::HLEReplacementAttributeType::DrawID); - maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, estimate); - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - } - -private: - void Fallback(const std::vector& parameters) { - SCOPE_EXIT { - // Clean everything. - maxwell3d.regs.vertex_id_base = 0x0; - maxwell3d.engine_state = Maxwell3D::EngineHint::None; - maxwell3d.replace_table.clear(); - }; - maxwell3d.RefreshParameters(); - const u32 start_indirect = parameters[0]; - const u32 end_indirect = parameters[1]; - if (start_indirect >= end_indirect) { - // Nothing to do. - return; - } - const auto topology = static_cast(parameters[2]); - const u32 padding = parameters[3]; - const std::size_t max_draws = parameters[4]; - - const u32 indirect_words = 5 + padding; - const std::size_t first_draw = start_indirect; - const std::size_t effective_draws = end_indirect - start_indirect; - const std::size_t last_draw = start_indirect + (std::min)(effective_draws, max_draws); - - for (std::size_t index = first_draw; index < last_draw; index++) { - const std::size_t base = index * indirect_words + 5; - const u32 base_vertex = parameters[base + 3]; - const u32 base_instance = parameters[base + 4]; - maxwell3d.regs.vertex_id_base = base_vertex; - maxwell3d.engine_state = Maxwell3D::EngineHint::OnHLEMacro; - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x640, Maxwell3D::HLEReplacementAttributeType::BaseVertex); - maxwell3d.SetHLEReplacementAttributeType( - 0, 0x644, Maxwell3D::HLEReplacementAttributeType::BaseInstance); - maxwell3d.CallMethod(0x8e3, 0x648, true); - maxwell3d.CallMethod(0x8e4, static_cast(index), true); - maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; - maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], - base_vertex, base_instance, parameters[base + 1]); - } - } -}; - -class HLE_DrawIndirectByteCount final : public HLEMacroImpl { -public: - explicit HLE_DrawIndirectByteCount(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - const bool force = maxwell3d.Rasterizer().HasDrawTransformFeedback(); - - auto topology = static_cast(parameters[0] & 0xFFFFU); - if (!force && (!maxwell3d.AnyParametersDirty() || !IsTopologySafe(topology))) { - Fallback(parameters); - return; - } - auto& params = maxwell3d.draw_manager->GetIndirectParams(); - params.is_byte_count = true; - params.is_indexed = false; - params.include_count = false; - params.count_start_address = 0; - params.indirect_start_address = maxwell3d.GetMacroAddress(2); - params.buffer_size = 4; - params.max_draw_counts = 1; - params.stride = parameters[1]; - maxwell3d.regs.draw.begin = parameters[0]; - maxwell3d.regs.draw_auto_stride = parameters[1]; - maxwell3d.regs.draw_auto_byte_count = parameters[2]; - - maxwell3d.draw_manager->DrawArrayIndirect(topology); - } - -private: - void Fallback(const std::vector& parameters) { - maxwell3d.RefreshParameters(); - - maxwell3d.regs.draw.begin = parameters[0]; - maxwell3d.regs.draw_auto_stride = parameters[1]; - maxwell3d.regs.draw_auto_byte_count = parameters[2]; - - maxwell3d.draw_manager->DrawArray( - maxwell3d.regs.draw.topology, 0, - maxwell3d.regs.draw_auto_byte_count / maxwell3d.regs.draw_auto_stride, 0, 1); - } -}; - -class HLE_C713C83D8F63CCF3 final : public HLEMacroImpl { -public: - explicit HLE_C713C83D8F63CCF3(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - const u32 offset = (parameters[0] & 0x3FFFFFFF) << 2; - const u32 address = maxwell3d.regs.shadow_scratch[24]; - auto& const_buffer = maxwell3d.regs.const_buffer; - const_buffer.size = 0x7000; - const_buffer.address_high = (address >> 24) & 0xFF; - const_buffer.address_low = address << 8; - const_buffer.offset = offset; - } -}; - -class HLE_D7333D26E0A93EDE final : public HLEMacroImpl { -public: - explicit HLE_D7333D26E0A93EDE(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - const size_t index = parameters[0]; - const u32 address = maxwell3d.regs.shadow_scratch[42 + index]; - const u32 size = maxwell3d.regs.shadow_scratch[47 + index]; - auto& const_buffer = maxwell3d.regs.const_buffer; - const_buffer.size = size; - const_buffer.address_high = (address >> 24) & 0xFF; - const_buffer.address_low = address << 8; - } -}; - -class HLE_BindShader final : public HLEMacroImpl { -public: - explicit HLE_BindShader(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - auto& regs = maxwell3d.regs; - const u32 index = parameters[0]; - if ((parameters[1] - regs.shadow_scratch[28 + index]) == 0) { - return; - } - - regs.pipelines[index & 0xF].offset = parameters[2]; - maxwell3d.dirty.flags[VideoCommon::Dirty::Shaders] = true; - regs.shadow_scratch[28 + index] = parameters[1]; - regs.shadow_scratch[34 + index] = parameters[2]; - - const u32 address = parameters[4]; - auto& const_buffer = regs.const_buffer; - const_buffer.size = 0x10000; - const_buffer.address_high = (address >> 24) & 0xFF; - const_buffer.address_low = address << 8; - - const size_t bind_group_id = parameters[3] & 0x7F; - auto& bind_group = regs.bind_groups[bind_group_id]; - bind_group.raw_config = 0x11; - maxwell3d.ProcessCBBind(bind_group_id); - } -}; - -class HLE_SetRasterBoundingBox final : public HLEMacroImpl { -public: - explicit HLE_SetRasterBoundingBox(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - const u32 raster_mode = parameters[0]; - auto& regs = maxwell3d.regs; - const u32 raster_enabled = maxwell3d.regs.conservative_raster_enable; - const u32 scratch_data = maxwell3d.regs.shadow_scratch[52]; - regs.raster_bounding_box.raw = raster_mode & 0xFFFFF00F; - regs.raster_bounding_box.pad.Assign(scratch_data & raster_enabled); - } -}; - -template -class HLE_ClearConstBuffer final : public HLEMacroImpl { -public: - explicit HLE_ClearConstBuffer(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - static constexpr std::array zeroes{}; - auto& regs = maxwell3d.regs; - regs.const_buffer.size = static_cast(base_size); - regs.const_buffer.address_high = parameters[0]; - regs.const_buffer.address_low = parameters[1]; - regs.const_buffer.offset = 0; - maxwell3d.ProcessCBMultiData(zeroes.data(), parameters[2] * 4); - } -}; - -class HLE_ClearMemory final : public HLEMacroImpl { -public: - explicit HLE_ClearMemory(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - - const u32 needed_memory = parameters[2] / sizeof(u32); - if (needed_memory > zero_memory.size()) { - zero_memory.resize(needed_memory, 0); - } - auto& regs = maxwell3d.regs; - regs.upload.line_length_in = parameters[2]; - regs.upload.line_count = 1; - regs.upload.dest.address_high = parameters[0]; - regs.upload.dest.address_low = parameters[1]; - maxwell3d.CallMethod(static_cast(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); - maxwell3d.CallMultiMethod(static_cast(MAXWELL3D_REG_INDEX(inline_data)), - zero_memory.data(), needed_memory, needed_memory); - } - -private: - std::vector zero_memory; -}; - -class HLE_TransformFeedbackSetup final : public HLEMacroImpl { -public: - explicit HLE_TransformFeedbackSetup(Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} - - void Execute(const std::vector& parameters, [[maybe_unused]] u32 method) override { - maxwell3d.RefreshParameters(); - - auto& regs = maxwell3d.regs; - regs.transform_feedback_enabled = 1; - regs.transform_feedback.buffers[0].start_offset = 0; - regs.transform_feedback.buffers[1].start_offset = 0; - regs.transform_feedback.buffers[2].start_offset = 0; - regs.transform_feedback.buffers[3].start_offset = 0; - - regs.upload.line_length_in = 4; - regs.upload.line_count = 1; - regs.upload.dest.address_high = parameters[0]; - regs.upload.dest.address_low = parameters[1]; - maxwell3d.CallMethod(static_cast(MAXWELL3D_REG_INDEX(launch_dma)), 0x1011, true); - maxwell3d.CallMethod(static_cast(MAXWELL3D_REG_INDEX(inline_data)), - regs.transform_feedback.controls[0].stride, true); - - maxwell3d.Rasterizer().RegisterTransformFeedback(regs.upload.dest.Address()); - } -}; - -} // Anonymous namespace - -HLEMacro::HLEMacro(Maxwell3D& maxwell3d_) : maxwell3d{maxwell3d_} { - builders.emplace(0x0D61FC9FAAC9FCADULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique>(maxwell3d__); - })); - builders.emplace(0x8A4D173EB99A8603ULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique>(maxwell3d__); - })); - builders.emplace(0x771BB18C62444DA0ULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique>(maxwell3d__); - })); - builders.emplace(0x0217920100488FF7ULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique>(maxwell3d__); - })); - builders.emplace(0x3F5E74B9C9A50164ULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique( - maxwell3d__); - })); - builders.emplace(0xEAD26C3E2109B06BULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); - })); - builders.emplace(0xC713C83D8F63CCF3ULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); - })); - builders.emplace(0xD7333D26E0A93EDEULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); - })); - builders.emplace(0xEB29B2A09AA06D38ULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); - })); - builders.emplace(0xDB1341DBEB4C8AF7ULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); - })); - builders.emplace(0x6C97861D891EDf7EULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique>(maxwell3d__); - })); - builders.emplace(0xD246FDDF3A6173D7ULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique>(maxwell3d__); - })); - builders.emplace(0xEE4D0004BEC8ECF4ULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); - })); - builders.emplace(0xFC0CF27F5FFAA661ULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); - })); - builders.emplace(0xB5F74EDB717278ECULL, - std::function(Maxwell3D&)>( - [](Maxwell3D& maxwell3d__) -> std::unique_ptr { - return std::make_unique(maxwell3d__); - })); -} - -HLEMacro::~HLEMacro() = default; - -std::unique_ptr HLEMacro::GetHLEProgram(u64 hash) const { - const auto it = builders.find(hash); - if (it == builders.end()) { - return nullptr; - } - return it->second(maxwell3d); -} - -} // namespace Tegra diff --git a/src/video_core/macro/macro_hle.h b/src/video_core/macro/macro_hle.h deleted file mode 100644 index 33f92fab16..0000000000 --- a/src/video_core/macro/macro_hle.h +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include - -#include "common/common_types.h" - -namespace Tegra { - -namespace Engines { -class Maxwell3D; -} - -class HLEMacro { -public: - explicit HLEMacro(Engines::Maxwell3D& maxwell3d_); - ~HLEMacro(); - - // Allocates and returns a cached macro if the hash matches a known function. - // Returns nullptr otherwise. - [[nodiscard]] std::unique_ptr GetHLEProgram(u64 hash) const; - -private: - Engines::Maxwell3D& maxwell3d; - std::unordered_map(Engines::Maxwell3D&)>> - builders; -}; - -} // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp deleted file mode 100644 index f9befce676..0000000000 --- a/src/video_core/macro/macro_interpreter.cpp +++ /dev/null @@ -1,362 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include - -#include "common/assert.h" -#include "common/logging/log.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/macro/macro_interpreter.h" - -namespace Tegra { -namespace { -class MacroInterpreterImpl final : public CachedMacro { -public: - explicit MacroInterpreterImpl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_) - : maxwell3d{maxwell3d_}, code{code_} {} - - void Execute(const std::vector& params, u32 method) override; - -private: - /// Resets the execution engine state, zeroing registers, etc. - void Reset(); - - /** - * Executes a single macro instruction located at the current program counter. Returns whether - * the interpreter should keep running. - * - * @param is_delay_slot Whether the current step is being executed due to a delay slot in a - * previous instruction. - */ - bool Step(bool is_delay_slot); - - /// Calculates the result of an ALU operation. src_a OP src_b; - u32 GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b); - - /// Performs the result operation on the input result and stores it in the specified register - /// (if necessary). - void ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result); - - /// Evaluates the branch condition and returns whether the branch should be taken or not. - bool EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const; - - /// Reads an opcode at the current program counter location. - Macro::Opcode GetOpcode() const; - - /// Returns the specified register's value. Register 0 is hardcoded to always return 0. - u32 GetRegister(u32 register_id) const; - - /// Sets the register to the input value. - void SetRegister(u32 register_id, u32 value); - - /// Sets the method address to use for the next Send instruction. - void SetMethodAddress(u32 address); - - /// Calls a GPU Engine method with the input parameter. - void Send(u32 value); - - /// Reads a GPU register located at the method address. - u32 Read(u32 method) const; - - /// Returns the next parameter in the parameter queue. - u32 FetchParameter(); - - Engines::Maxwell3D& maxwell3d; - - /// Current program counter - u32 pc{}; - /// Program counter to execute at after the delay slot is executed. - std::optional delayed_pc; - - /// General purpose macro registers. - std::array registers = {}; - - /// Method address to use for the next Send instruction. - Macro::MethodAddress method_address = {}; - - /// Input parameters of the current macro. - std::unique_ptr parameters; - std::size_t num_parameters = 0; - std::size_t parameters_capacity = 0; - /// Index of the next parameter that will be fetched by the 'parm' instruction. - u32 next_parameter_index = 0; - - bool carry_flag = false; - const std::vector& code; -}; - -void MacroInterpreterImpl::Execute(const std::vector& params, u32 method) { - Reset(); - - registers[1] = params[0]; - num_parameters = params.size(); - - if (num_parameters > parameters_capacity) { - parameters_capacity = num_parameters; - parameters = std::make_unique(num_parameters); - } - std::memcpy(parameters.get(), params.data(), num_parameters * sizeof(u32)); - - // Execute the code until we hit an exit condition. - bool keep_executing = true; - while (keep_executing) { - keep_executing = Step(false); - } - - // Assert the the macro used all the input parameters - ASSERT(next_parameter_index == num_parameters); -} - -void MacroInterpreterImpl::Reset() { - registers = {}; - pc = 0; - delayed_pc = {}; - method_address.raw = 0; - num_parameters = 0; - // The next parameter index starts at 1, because $r1 already has the value of the first - // parameter. - next_parameter_index = 1; - carry_flag = false; -} - -bool MacroInterpreterImpl::Step(bool is_delay_slot) { - u32 base_address = pc; - - Macro::Opcode opcode = GetOpcode(); - pc += 4; - - // Update the program counter if we were delayed - if (delayed_pc) { - ASSERT(is_delay_slot); - pc = *delayed_pc; - delayed_pc = {}; - } - - switch (opcode.operation) { - case Macro::Operation::ALU: { - u32 result = GetALUResult(opcode.alu_operation, GetRegister(opcode.src_a), - GetRegister(opcode.src_b)); - ProcessResult(opcode.result_operation, opcode.dst, result); - break; - } - case Macro::Operation::AddImmediate: { - ProcessResult(opcode.result_operation, opcode.dst, - GetRegister(opcode.src_a) + opcode.immediate); - break; - } - case Macro::Operation::ExtractInsert: { - u32 dst = GetRegister(opcode.src_a); - u32 src = GetRegister(opcode.src_b); - - src = (src >> opcode.bf_src_bit) & opcode.GetBitfieldMask(); - dst &= ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); - dst |= src << opcode.bf_dst_bit; - ProcessResult(opcode.result_operation, opcode.dst, dst); - break; - } - case Macro::Operation::ExtractShiftLeftImmediate: { - u32 dst = GetRegister(opcode.src_a); - u32 src = GetRegister(opcode.src_b); - - u32 result = ((src >> dst) & opcode.GetBitfieldMask()) << opcode.bf_dst_bit; - - ProcessResult(opcode.result_operation, opcode.dst, result); - break; - } - case Macro::Operation::ExtractShiftLeftRegister: { - u32 dst = GetRegister(opcode.src_a); - u32 src = GetRegister(opcode.src_b); - - u32 result = ((src >> opcode.bf_src_bit) & opcode.GetBitfieldMask()) << dst; - - ProcessResult(opcode.result_operation, opcode.dst, result); - break; - } - case Macro::Operation::Read: { - u32 result = Read(GetRegister(opcode.src_a) + opcode.immediate); - ProcessResult(opcode.result_operation, opcode.dst, result); - break; - } - case Macro::Operation::Branch: { - ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); - u32 value = GetRegister(opcode.src_a); - bool taken = EvaluateBranchCondition(opcode.branch_condition, value); - if (taken) { - // Ignore the delay slot if the branch has the annul bit. - if (opcode.branch_annul) { - pc = base_address + opcode.GetBranchTarget(); - return true; - } - - delayed_pc = base_address + opcode.GetBranchTarget(); - // Execute one more instruction due to the delay slot. - return Step(true); - } - break; - } - default: - UNIMPLEMENTED_MSG("Unimplemented macro operation {}", opcode.operation.Value()); - break; - } - - // An instruction with the Exit flag will not actually - // cause an exit if it's executed inside a delay slot. - if (opcode.is_exit && !is_delay_slot) { - // Exit has a delay slot, execute the next instruction - Step(true); - return false; - } - - return true; -} - -u32 MacroInterpreterImpl::GetALUResult(Macro::ALUOperation operation, u32 src_a, u32 src_b) { - switch (operation) { - case Macro::ALUOperation::Add: { - const u64 result{static_cast(src_a) + src_b}; - carry_flag = result > 0xffffffff; - return static_cast(result); - } - case Macro::ALUOperation::AddWithCarry: { - const u64 result{static_cast(src_a) + src_b + (carry_flag ? 1ULL : 0ULL)}; - carry_flag = result > 0xffffffff; - return static_cast(result); - } - case Macro::ALUOperation::Subtract: { - const u64 result{static_cast(src_a) - src_b}; - carry_flag = result < 0x100000000; - return static_cast(result); - } - case Macro::ALUOperation::SubtractWithBorrow: { - const u64 result{static_cast(src_a) - src_b - (carry_flag ? 0ULL : 1ULL)}; - carry_flag = result < 0x100000000; - return static_cast(result); - } - case Macro::ALUOperation::Xor: - return src_a ^ src_b; - case Macro::ALUOperation::Or: - return src_a | src_b; - case Macro::ALUOperation::And: - return src_a & src_b; - case Macro::ALUOperation::AndNot: - return src_a & ~src_b; - case Macro::ALUOperation::Nand: - return ~(src_a & src_b); - - default: - UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", operation); - return 0; - } -} - -void MacroInterpreterImpl::ProcessResult(Macro::ResultOperation operation, u32 reg, u32 result) { - switch (operation) { - case Macro::ResultOperation::IgnoreAndFetch: - // Fetch parameter and ignore result. - SetRegister(reg, FetchParameter()); - break; - case Macro::ResultOperation::Move: - // Move result. - SetRegister(reg, result); - break; - case Macro::ResultOperation::MoveAndSetMethod: - // Move result and use as Method Address. - SetRegister(reg, result); - SetMethodAddress(result); - break; - case Macro::ResultOperation::FetchAndSend: - // Fetch parameter and send result. - SetRegister(reg, FetchParameter()); - Send(result); - break; - case Macro::ResultOperation::MoveAndSend: - // Move and send result. - SetRegister(reg, result); - Send(result); - break; - case Macro::ResultOperation::FetchAndSetMethod: - // Fetch parameter and use result as Method Address. - SetRegister(reg, FetchParameter()); - SetMethodAddress(result); - break; - case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: - // Move result and use as Method Address, then fetch and send parameter. - SetRegister(reg, result); - SetMethodAddress(result); - Send(FetchParameter()); - break; - case Macro::ResultOperation::MoveAndSetMethodSend: - // Move result and use as Method Address, then send bits 12:17 of result. - SetRegister(reg, result); - SetMethodAddress(result); - Send((result >> 12) & 0b111111); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented result operation {}", operation); - break; - } -} - -bool MacroInterpreterImpl::EvaluateBranchCondition(Macro::BranchCondition cond, u32 value) const { - switch (cond) { - case Macro::BranchCondition::Zero: - return value == 0; - case Macro::BranchCondition::NotZero: - return value != 0; - } - UNREACHABLE(); -} - -Macro::Opcode MacroInterpreterImpl::GetOpcode() const { - ASSERT((pc % sizeof(u32)) == 0); - ASSERT(pc < code.size() * sizeof(u32)); - return {code[pc / sizeof(u32)]}; -} - -u32 MacroInterpreterImpl::GetRegister(u32 register_id) const { - return registers.at(register_id); -} - -void MacroInterpreterImpl::SetRegister(u32 register_id, u32 value) { - // Register 0 is hardwired as the zero register. - // Ensure no writes to it actually occur. - if (register_id == 0) { - return; - } - - registers.at(register_id) = value; -} - -void MacroInterpreterImpl::SetMethodAddress(u32 address) { - method_address.raw = address; -} - -void MacroInterpreterImpl::Send(u32 value) { - maxwell3d.CallMethod(method_address.address, value, true); - // Increment the method address by the method increment. - method_address.address.Assign(method_address.address.Value() + - method_address.increment.Value()); -} - -u32 MacroInterpreterImpl::Read(u32 method) const { - return maxwell3d.GetRegisterValue(method); -} - -u32 MacroInterpreterImpl::FetchParameter() { - ASSERT(next_parameter_index < num_parameters); - return parameters[next_parameter_index++]; -} -} // Anonymous namespace - -MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d_) - : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} - -std::unique_ptr MacroInterpreter::Compile(const std::vector& code) { - return std::make_unique(maxwell3d, code); -} - -} // namespace Tegra diff --git a/src/video_core/macro/macro_interpreter.h b/src/video_core/macro/macro_interpreter.h deleted file mode 100644 index f5eeb0b76f..0000000000 --- a/src/video_core/macro/macro_interpreter.h +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include - -#include "common/common_types.h" -#include "video_core/macro/macro.h" - -namespace Tegra { -namespace Engines { -class Maxwell3D; -} - -class MacroInterpreter final : public MacroEngine { -public: - explicit MacroInterpreter(Engines::Maxwell3D& maxwell3d_); - -protected: - std::unique_ptr Compile(const std::vector& code) override; - -private: - Engines::Maxwell3D& maxwell3d; -}; - -} // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp deleted file mode 100644 index 65935f6c62..0000000000 --- a/src/video_core/macro/macro_jit_x64.cpp +++ /dev/null @@ -1,678 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project -// SPDX-License-Identifier: GPL-3.0-or-later - -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include -#include - -#include - -#include "common/assert.h" -#include "common/bit_field.h" -#include "common/logging/log.h" -#include "common/x64/xbyak_abi.h" -#include "common/x64/xbyak_util.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/macro/macro_interpreter.h" -#include "video_core/macro/macro_jit_x64.h" - -namespace Tegra { -namespace { -constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; -constexpr Xbyak::Reg32 RESULT = Xbyak::util::r10d; -constexpr Xbyak::Reg64 MAX_PARAMETER = Xbyak::util::r11; -constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; -constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; -constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; - -constexpr std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ - STATE, - RESULT, - MAX_PARAMETER, - PARAMETERS, - METHOD_ADDRESS, - BRANCH_HOLDER, -}); - -// Arbitrarily chosen based on current booting games. -constexpr size_t MAX_CODE_SIZE = 0x10000; - -std::bitset<32> PersistentCallerSavedRegs() { - return PERSISTENT_REGISTERS & Common::X64::ABI_ALL_CALLER_SAVED; -} - -/// @brief Must enforce W^X constraints, as we yet don't havea global "NO_EXECUTE" support flag -/// the speed loss is minimal, and in fact may be negligible, however for your peace of mind -/// I simply included known OSes whom had W^X issues -#if defined(__OpenBSD__) || defined(__NetBSD__) || defined(__DragonFly__) -static const auto default_cg_mode = Xbyak::DontSetProtectRWE; -#else -static const auto default_cg_mode = nullptr; //Allow RWE -#endif - -class MacroJITx64Impl final : public Xbyak::CodeGenerator, public CachedMacro { -public: - explicit MacroJITx64Impl(Engines::Maxwell3D& maxwell3d_, const std::vector& code_) - : Xbyak::CodeGenerator(MAX_CODE_SIZE, default_cg_mode) - , code{code_}, maxwell3d{maxwell3d_} { - Compile(); - } - - void Execute(const std::vector& parameters, u32 method) override; - - void Compile_ALU(Macro::Opcode opcode); - void Compile_AddImmediate(Macro::Opcode opcode); - void Compile_ExtractInsert(Macro::Opcode opcode); - void Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode); - void Compile_ExtractShiftLeftRegister(Macro::Opcode opcode); - void Compile_Read(Macro::Opcode opcode); - void Compile_Branch(Macro::Opcode opcode); - -private: - void Optimizer_ScanFlags(); - - void Compile(); - bool Compile_NextInstruction(); - - Xbyak::Reg32 Compile_FetchParameter(); - Xbyak::Reg32 Compile_GetRegister(u32 index, Xbyak::Reg32 dst); - - void Compile_ProcessResult(Macro::ResultOperation operation, u32 reg); - void Compile_Send(Xbyak::Reg32 value); - - Macro::Opcode GetOpCode() const; - - struct JITState { - Engines::Maxwell3D* maxwell3d{}; - std::array registers{}; - u32 carry_flag{}; - }; - static_assert(offsetof(JITState, maxwell3d) == 0, "Maxwell3D is not at 0x0"); - using ProgramType = void (*)(JITState*, const u32*, const u32*); - - struct OptimizerState { - bool can_skip_carry{}; - bool has_delayed_pc{}; - bool zero_reg_skip{}; - bool skip_dummy_addimmediate{}; - bool optimize_for_method_move{}; - bool enable_asserts{}; - }; - OptimizerState optimizer{}; - - std::optional next_opcode{}; - ProgramType program{nullptr}; - - std::array labels; - std::array delay_skip; - Xbyak::Label end_of_code{}; - - bool is_delay_slot{}; - u32 pc{}; - - const std::vector& code; - Engines::Maxwell3D& maxwell3d; -}; - -void MacroJITx64Impl::Execute(const std::vector& parameters, u32 method) { - ASSERT_OR_EXECUTE(program != nullptr, { return; }); - JITState state{}; - state.maxwell3d = &maxwell3d; - state.registers = {}; - program(&state, parameters.data(), parameters.data() + parameters.size()); -} - -void MacroJITx64Impl::Compile_ALU(Macro::Opcode opcode) { - const bool is_a_zero = opcode.src_a == 0; - const bool is_b_zero = opcode.src_b == 0; - const bool valid_operation = !is_a_zero && !is_b_zero; - [[maybe_unused]] const bool is_move_operation = !is_a_zero && is_b_zero; - const bool has_zero_register = is_a_zero || is_b_zero; - const bool no_zero_reg_skip = opcode.alu_operation == Macro::ALUOperation::AddWithCarry || - opcode.alu_operation == Macro::ALUOperation::SubtractWithBorrow; - - Xbyak::Reg32 src_a; - Xbyak::Reg32 src_b; - - if (!optimizer.zero_reg_skip || no_zero_reg_skip) { - src_a = Compile_GetRegister(opcode.src_a, RESULT); - src_b = Compile_GetRegister(opcode.src_b, eax); - } else { - if (!is_a_zero) { - src_a = Compile_GetRegister(opcode.src_a, RESULT); - } - if (!is_b_zero) { - src_b = Compile_GetRegister(opcode.src_b, eax); - } - } - - bool has_emitted = false; - - switch (opcode.alu_operation) { - case Macro::ALUOperation::Add: - if (optimizer.zero_reg_skip) { - if (valid_operation) { - add(src_a, src_b); - } - } else { - add(src_a, src_b); - } - - if (!optimizer.can_skip_carry) { - setc(byte[STATE + offsetof(JITState, carry_flag)]); - } - break; - case Macro::ALUOperation::AddWithCarry: - bt(dword[STATE + offsetof(JITState, carry_flag)], 0); - adc(src_a, src_b); - setc(byte[STATE + offsetof(JITState, carry_flag)]); - break; - case Macro::ALUOperation::Subtract: - if (optimizer.zero_reg_skip) { - if (valid_operation) { - sub(src_a, src_b); - has_emitted = true; - } - } else { - sub(src_a, src_b); - has_emitted = true; - } - if (!optimizer.can_skip_carry && has_emitted) { - setc(byte[STATE + offsetof(JITState, carry_flag)]); - } - break; - case Macro::ALUOperation::SubtractWithBorrow: - bt(dword[STATE + offsetof(JITState, carry_flag)], 0); - sbb(src_a, src_b); - setc(byte[STATE + offsetof(JITState, carry_flag)]); - break; - case Macro::ALUOperation::Xor: - if (optimizer.zero_reg_skip) { - if (valid_operation) { - xor_(src_a, src_b); - } - } else { - xor_(src_a, src_b); - } - break; - case Macro::ALUOperation::Or: - if (optimizer.zero_reg_skip) { - if (valid_operation) { - or_(src_a, src_b); - } - } else { - or_(src_a, src_b); - } - break; - case Macro::ALUOperation::And: - if (optimizer.zero_reg_skip) { - if (!has_zero_register) { - and_(src_a, src_b); - } - } else { - and_(src_a, src_b); - } - break; - case Macro::ALUOperation::AndNot: - if (optimizer.zero_reg_skip) { - if (!is_a_zero) { - not_(src_b); - and_(src_a, src_b); - } - } else { - not_(src_b); - and_(src_a, src_b); - } - break; - case Macro::ALUOperation::Nand: - if (optimizer.zero_reg_skip) { - if (!is_a_zero) { - and_(src_a, src_b); - not_(src_a); - } - } else { - and_(src_a, src_b); - not_(src_a); - } - break; - default: - UNIMPLEMENTED_MSG("Unimplemented ALU operation {}", opcode.alu_operation.Value()); - break; - } - Compile_ProcessResult(opcode.result_operation, opcode.dst); -} - -void MacroJITx64Impl::Compile_AddImmediate(Macro::Opcode opcode) { - if (optimizer.skip_dummy_addimmediate) { - // Games tend to use this as an exit instruction placeholder. It's to encode an instruction - // without doing anything. In our case we can just not emit anything. - if (opcode.result_operation == Macro::ResultOperation::Move && opcode.dst == 0) { - return; - } - } - // Check for redundant moves - if (optimizer.optimize_for_method_move && - opcode.result_operation == Macro::ResultOperation::MoveAndSetMethod) { - if (next_opcode.has_value()) { - const auto next = *next_opcode; - if (next.result_operation == Macro::ResultOperation::MoveAndSetMethod && - opcode.dst == next.dst) { - return; - } - } - } - if (optimizer.zero_reg_skip && opcode.src_a == 0) { - if (opcode.immediate == 0) { - xor_(RESULT, RESULT); - } else { - mov(RESULT, opcode.immediate); - } - } else { - auto result = Compile_GetRegister(opcode.src_a, RESULT); - if (opcode.immediate > 2) { - add(result, opcode.immediate); - } else if (opcode.immediate == 1) { - inc(result); - } else if (opcode.immediate < 0) { - sub(result, opcode.immediate * -1); - } - } - Compile_ProcessResult(opcode.result_operation, opcode.dst); -} - -void MacroJITx64Impl::Compile_ExtractInsert(Macro::Opcode opcode) { - auto dst = Compile_GetRegister(opcode.src_a, RESULT); - auto src = Compile_GetRegister(opcode.src_b, eax); - - const u32 mask = ~(opcode.GetBitfieldMask() << opcode.bf_dst_bit); - and_(dst, mask); - shr(src, opcode.bf_src_bit); - and_(src, opcode.GetBitfieldMask()); - shl(src, opcode.bf_dst_bit); - or_(dst, src); - - Compile_ProcessResult(opcode.result_operation, opcode.dst); -} - -void MacroJITx64Impl::Compile_ExtractShiftLeftImmediate(Macro::Opcode opcode) { - const auto dst = Compile_GetRegister(opcode.src_a, ecx); - const auto src = Compile_GetRegister(opcode.src_b, RESULT); - - shr(src, dst.cvt8()); - and_(src, opcode.GetBitfieldMask()); - shl(src, opcode.bf_dst_bit); - - Compile_ProcessResult(opcode.result_operation, opcode.dst); -} - -void MacroJITx64Impl::Compile_ExtractShiftLeftRegister(Macro::Opcode opcode) { - const auto dst = Compile_GetRegister(opcode.src_a, ecx); - const auto src = Compile_GetRegister(opcode.src_b, RESULT); - - shr(src, opcode.bf_src_bit); - and_(src, opcode.GetBitfieldMask()); - shl(src, dst.cvt8()); - - Compile_ProcessResult(opcode.result_operation, opcode.dst); -} - -void MacroJITx64Impl::Compile_Read(Macro::Opcode opcode) { - if (optimizer.zero_reg_skip && opcode.src_a == 0) { - if (opcode.immediate == 0) { - xor_(RESULT, RESULT); - } else { - mov(RESULT, opcode.immediate); - } - } else { - auto result = Compile_GetRegister(opcode.src_a, RESULT); - if (opcode.immediate > 2) { - add(result, opcode.immediate); - } else if (opcode.immediate == 1) { - inc(result); - } else if (opcode.immediate < 0) { - sub(result, opcode.immediate * -1); - } - } - - // Equivalent to Engines::Maxwell3D::GetRegisterValue: - if (optimizer.enable_asserts) { - Xbyak::Label pass_range_check; - cmp(RESULT, static_cast(Engines::Maxwell3D::Regs::NUM_REGS)); - jb(pass_range_check); - int3(); - L(pass_range_check); - } - mov(rax, qword[STATE]); - mov(RESULT, - dword[rax + offsetof(Engines::Maxwell3D, regs) + - offsetof(Engines::Maxwell3D::Regs, reg_array) + RESULT.cvt64() * sizeof(u32)]); - - Compile_ProcessResult(opcode.result_operation, opcode.dst); -} - -void Send(Engines::Maxwell3D* maxwell3d, Macro::MethodAddress method_address, u32 value) { - maxwell3d->CallMethod(method_address.address, value, true); -} - -void MacroJITx64Impl::Compile_Send(Xbyak::Reg32 value) { - Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); - mov(Common::X64::ABI_PARAM1, qword[STATE]); - mov(Common::X64::ABI_PARAM2.cvt32(), METHOD_ADDRESS); - mov(Common::X64::ABI_PARAM3.cvt32(), value); - Common::X64::CallFarFunction(*this, &Send); - Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); - - Xbyak::Label dont_process{}; - // Get increment - test(METHOD_ADDRESS, 0x3f000); - // If zero, method address doesn't update - je(dont_process); - - mov(ecx, METHOD_ADDRESS); - and_(METHOD_ADDRESS, 0xfff); - shr(ecx, 12); - and_(ecx, 0x3f); - lea(eax, ptr[rcx + METHOD_ADDRESS.cvt64()]); - sal(ecx, 12); - or_(eax, ecx); - - mov(METHOD_ADDRESS, eax); - - L(dont_process); -} - -void MacroJITx64Impl::Compile_Branch(Macro::Opcode opcode) { - ASSERT_MSG(!is_delay_slot, "Executing a branch in a delay slot is not valid"); - const s32 jump_address = - static_cast(pc) + static_cast(opcode.GetBranchTarget() / sizeof(s32)); - - Xbyak::Label end; - auto value = Compile_GetRegister(opcode.src_a, eax); - cmp(value, 0); // test(value, value); - if (optimizer.has_delayed_pc) { - switch (opcode.branch_condition) { - case Macro::BranchCondition::Zero: - jne(end, T_NEAR); - break; - case Macro::BranchCondition::NotZero: - je(end, T_NEAR); - break; - } - - if (opcode.branch_annul) { - xor_(BRANCH_HOLDER, BRANCH_HOLDER); - jmp(labels[jump_address], T_NEAR); - } else { - Xbyak::Label handle_post_exit{}; - Xbyak::Label skip{}; - jmp(skip, T_NEAR); - - L(handle_post_exit); - xor_(BRANCH_HOLDER, BRANCH_HOLDER); - jmp(labels[jump_address], T_NEAR); - - L(skip); - mov(BRANCH_HOLDER, handle_post_exit); - jmp(delay_skip[pc], T_NEAR); - } - } else { - switch (opcode.branch_condition) { - case Macro::BranchCondition::Zero: - je(labels[jump_address], T_NEAR); - break; - case Macro::BranchCondition::NotZero: - jne(labels[jump_address], T_NEAR); - break; - } - } - - L(end); -} - -void MacroJITx64Impl::Optimizer_ScanFlags() { - optimizer.can_skip_carry = true; - optimizer.has_delayed_pc = false; - for (auto raw_op : code) { - Macro::Opcode op{}; - op.raw = raw_op; - - if (op.operation == Macro::Operation::ALU) { - // Scan for any ALU operations which actually use the carry flag, if they don't exist in - // our current code we can skip emitting the carry flag handling operations - if (op.alu_operation == Macro::ALUOperation::AddWithCarry || - op.alu_operation == Macro::ALUOperation::SubtractWithBorrow) { - optimizer.can_skip_carry = false; - } - } - - if (op.operation == Macro::Operation::Branch) { - if (!op.branch_annul) { - optimizer.has_delayed_pc = true; - } - } - } -} - -void MacroJITx64Impl::Compile() { - labels.fill(Xbyak::Label()); - - Common::X64::ABI_PushRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); - // JIT state - mov(STATE, Common::X64::ABI_PARAM1); - mov(PARAMETERS, Common::X64::ABI_PARAM2); - mov(MAX_PARAMETER, Common::X64::ABI_PARAM3); - xor_(RESULT, RESULT); - xor_(METHOD_ADDRESS, METHOD_ADDRESS); - xor_(BRANCH_HOLDER, BRANCH_HOLDER); - - mov(dword[STATE + offsetof(JITState, registers) + 4], Compile_FetchParameter()); - - // Track get register for zero registers and mark it as no-op - optimizer.zero_reg_skip = true; - - // AddImmediate tends to be used as a NOP instruction, if we detect this we can - // completely skip the entire code path and no emit anything - optimizer.skip_dummy_addimmediate = true; - - // SMO tends to emit a lot of unnecessary method moves, we can mitigate this by only emitting - // one if our register isn't "dirty" - optimizer.optimize_for_method_move = true; - - // Enable run-time assertions in JITted code - optimizer.enable_asserts = false; - - // Check to see if we can skip emitting certain instructions - Optimizer_ScanFlags(); - - const u32 op_count = static_cast(code.size()); - for (u32 i = 0; i < op_count; i++) { - if (i < op_count - 1) { - pc = i + 1; - next_opcode = GetOpCode(); - } else { - next_opcode = {}; - } - pc = i; - Compile_NextInstruction(); - } - - L(end_of_code); - - Common::X64::ABI_PopRegistersAndAdjustStack(*this, Common::X64::ABI_ALL_CALLEE_SAVED, 8); - ret(); - ready(); - program = getCode(); -} - -bool MacroJITx64Impl::Compile_NextInstruction() { - const auto opcode = GetOpCode(); - if (labels[pc].getAddress()) { - return false; - } - - L(labels[pc]); - - switch (opcode.operation) { - case Macro::Operation::ALU: - Compile_ALU(opcode); - break; - case Macro::Operation::AddImmediate: - Compile_AddImmediate(opcode); - break; - case Macro::Operation::ExtractInsert: - Compile_ExtractInsert(opcode); - break; - case Macro::Operation::ExtractShiftLeftImmediate: - Compile_ExtractShiftLeftImmediate(opcode); - break; - case Macro::Operation::ExtractShiftLeftRegister: - Compile_ExtractShiftLeftRegister(opcode); - break; - case Macro::Operation::Read: - Compile_Read(opcode); - break; - case Macro::Operation::Branch: - Compile_Branch(opcode); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented opcode {}", opcode.operation.Value()); - break; - } - - if (optimizer.has_delayed_pc) { - if (opcode.is_exit) { - mov(rax, end_of_code); - test(BRANCH_HOLDER, BRANCH_HOLDER); - cmove(BRANCH_HOLDER, rax); - // Jump to next instruction to skip delay slot check - je(labels[pc + 1], T_NEAR); - } else { - // TODO(ogniK): Optimize delay slot branching - Xbyak::Label no_delay_slot{}; - test(BRANCH_HOLDER, BRANCH_HOLDER); - je(no_delay_slot, T_NEAR); - mov(rax, BRANCH_HOLDER); - xor_(BRANCH_HOLDER, BRANCH_HOLDER); - jmp(rax); - L(no_delay_slot); - } - L(delay_skip[pc]); - if (opcode.is_exit) { - return false; - } - } else { - test(BRANCH_HOLDER, BRANCH_HOLDER); - jne(end_of_code, T_NEAR); - if (opcode.is_exit) { - inc(BRANCH_HOLDER); - return false; - } - } - return true; -} - -static void WarnInvalidParameter(uintptr_t parameter, uintptr_t max_parameter) { - LOG_CRITICAL(HW_GPU, - "Macro JIT: invalid parameter access 0x{:x} (0x{:x} is the last parameter)", - parameter, max_parameter - sizeof(u32)); -} - -Xbyak::Reg32 MacroJITx64Impl::Compile_FetchParameter() { - Xbyak::Label parameter_ok{}; - cmp(PARAMETERS, MAX_PARAMETER); - jb(parameter_ok, T_NEAR); - Common::X64::ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); - mov(Common::X64::ABI_PARAM1, PARAMETERS); - mov(Common::X64::ABI_PARAM2, MAX_PARAMETER); - Common::X64::CallFarFunction(*this, &WarnInvalidParameter); - Common::X64::ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); - L(parameter_ok); - mov(eax, dword[PARAMETERS]); - add(PARAMETERS, sizeof(u32)); - return eax; -} - -Xbyak::Reg32 MacroJITx64Impl::Compile_GetRegister(u32 index, Xbyak::Reg32 dst) { - if (index == 0) { - // Register 0 is always zero - xor_(dst, dst); - } else { - mov(dst, dword[STATE + offsetof(JITState, registers) + index * sizeof(u32)]); - } - - return dst; -} - -void MacroJITx64Impl::Compile_ProcessResult(Macro::ResultOperation operation, u32 reg) { - const auto SetRegister = [this](u32 reg_index, const Xbyak::Reg32& result) { - // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero - // register. - if (reg_index == 0) { - return; - } - mov(dword[STATE + offsetof(JITState, registers) + reg_index * sizeof(u32)], result); - }; - const auto SetMethodAddress = [this](const Xbyak::Reg32& reg32) { mov(METHOD_ADDRESS, reg32); }; - - switch (operation) { - case Macro::ResultOperation::IgnoreAndFetch: - SetRegister(reg, Compile_FetchParameter()); - break; - case Macro::ResultOperation::Move: - SetRegister(reg, RESULT); - break; - case Macro::ResultOperation::MoveAndSetMethod: - SetRegister(reg, RESULT); - SetMethodAddress(RESULT); - break; - case Macro::ResultOperation::FetchAndSend: - // Fetch parameter and send result. - SetRegister(reg, Compile_FetchParameter()); - Compile_Send(RESULT); - break; - case Macro::ResultOperation::MoveAndSend: - // Move and send result. - SetRegister(reg, RESULT); - Compile_Send(RESULT); - break; - case Macro::ResultOperation::FetchAndSetMethod: - // Fetch parameter and use result as Method Address. - SetRegister(reg, Compile_FetchParameter()); - SetMethodAddress(RESULT); - break; - case Macro::ResultOperation::MoveAndSetMethodFetchAndSend: - // Move result and use as Method Address, then fetch and send parameter. - SetRegister(reg, RESULT); - SetMethodAddress(RESULT); - Compile_Send(Compile_FetchParameter()); - break; - case Macro::ResultOperation::MoveAndSetMethodSend: - // Move result and use as Method Address, then send bits 12:17 of result. - SetRegister(reg, RESULT); - SetMethodAddress(RESULT); - shr(RESULT, 12); - and_(RESULT, 0b111111); - Compile_Send(RESULT); - break; - default: - UNIMPLEMENTED_MSG("Unimplemented macro operation {}", operation); - break; - } -} - -Macro::Opcode MacroJITx64Impl::GetOpCode() const { - ASSERT(pc < code.size()); - return {code[pc]}; -} -} // Anonymous namespace - -MacroJITx64::MacroJITx64(Engines::Maxwell3D& maxwell3d_) - : MacroEngine{maxwell3d_}, maxwell3d{maxwell3d_} {} - -std::unique_ptr MacroJITx64::Compile(const std::vector& code) { - return std::make_unique(maxwell3d, code); -} -} // namespace Tegra diff --git a/src/video_core/macro/macro_jit_x64.h b/src/video_core/macro/macro_jit_x64.h deleted file mode 100644 index 99ee1b9e68..0000000000 --- a/src/video_core/macro/macro_jit_x64.h +++ /dev/null @@ -1,26 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "common/common_types.h" -#include "video_core/macro/macro.h" - -namespace Tegra { - -namespace Engines { -class Maxwell3D; -} - -class MacroJITx64 final : public MacroEngine { -public: - explicit MacroJITx64(Engines::Maxwell3D& maxwell3d_); - -protected: - std::unique_ptr Compile(const std::vector& code) override; - -private: - Engines::Maxwell3D& maxwell3d; -}; - -} // namespace Tegra diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 75254049a6..14ab5dd967 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1214,19 +1214,16 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageV ImageView::~ImageView() = default; GLuint ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { - if (image_format == Shader::ImageFormat::Typeless) { + if (image_format == Shader::ImageFormat::Typeless) return Handle(texture_type); - } - const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || - image_format == Shader::ImageFormat::R16_SINT}; - if (!storage_views) { - storage_views = std::make_unique(); - } + const bool is_signed = image_format == Shader::ImageFormat::R8_SINT + || image_format == Shader::ImageFormat::R16_SINT; + if (!storage_views) + storage_views.emplace(); auto& type_views{is_signed ? storage_views->signeds : storage_views->unsigneds}; - GLuint& view{type_views[static_cast(texture_type)]}; - if (view == 0) { + GLuint& view{type_views[size_t(texture_type)]}; + if (view == 0) view = MakeView(texture_type, ShaderFormat(image_format)); - } return view; } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 3de24508fe..e2a2022cb2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -302,7 +302,7 @@ private: std::array views{}; std::vector stored_views; - std::unique_ptr storage_views; + std::optional storage_views; GLenum internal_format = GL_NONE; GLuint default_handle = 0; u32 buffer_size = 0; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 101a884fd7..c3a5ed391b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -376,7 +376,6 @@ void RasterizerVulkan::DrawTexture() { } void RasterizerVulkan::Clear(u32 layer_count) { - FlushWork(); gpu_memory->FlushCaching(); @@ -396,9 +395,7 @@ void RasterizerVulkan::Clear(u32 layer_count) { scheduler.RequestRenderpass(framebuffer); query_cache.NotifySegment(true); - query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, - maxwell3d->regs.zpass_pixel_count_enable); - + query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); u32 up_scale = 1; u32 down_shift = 0; if (texture_cache.IsRescaling()) { @@ -443,14 +440,14 @@ void RasterizerVulkan::Clear(u32 layer_count) { offset = 0; return; } - if (offset >= static_cast(limit)) { - offset = static_cast(limit); + if (offset >= s32(limit)) { + offset = s32(limit); extent = 0; return; } - const u64 end_coord = static_cast(offset) + extent; + const u64 end_coord = u64(offset) + extent; if (end_coord > limit) { - extent = limit - static_cast(offset); + extent = limit - u32(offset); } }; @@ -464,30 +461,22 @@ void RasterizerVulkan::Clear(u32 layer_count) { const u32 color_attachment = regs.clear_surface.RT; if (use_color && framebuffer->HasAspectColorBit(color_attachment)) { - const auto format = - VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format); + const auto format = VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format); bool is_integer = IsPixelFormatInteger(format); bool is_signed = IsPixelFormatSignedInteger(format); size_t int_size = PixelComponentSizeBitsInteger(format); VkClearValue clear_value{}; if (!is_integer) { - std::memcpy(clear_value.color.float32, regs.clear_color.data(), - regs.clear_color.size() * sizeof(f32)); + std::memcpy(clear_value.color.float32, regs.clear_color.data(), regs.clear_color.size() * sizeof(f32)); } else if (!is_signed) { - for (size_t i = 0; i < 4; i++) { - clear_value.color.uint32[i] = static_cast( - static_cast(static_cast(int_size) << 1U) * regs.clear_color[i]); - } + for (size_t i = 0; i < 4; i++) + clear_value.color.uint32[i] = u32(f32(u64(int_size) << 1U) * regs.clear_color[i]); } else { - for (size_t i = 0; i < 4; i++) { - clear_value.color.int32[i] = - static_cast(static_cast(static_cast(int_size - 1) << 1) * - (regs.clear_color[i] - 0.5f)); - } + for (size_t i = 0; i < 4; i++) + clear_value.color.int32[i] = s32(f32(s64(int_size - 1) << 1) * (regs.clear_color[i] - 0.5f)); } - if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B && - regs.clear_surface.A) { + if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B && regs.clear_surface.A) { scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) { const VkClearAttachment attachment{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, @@ -497,14 +486,11 @@ void RasterizerVulkan::Clear(u32 layer_count) { cmdbuf.ClearAttachments(attachment, clear_rect); }); } else { - u8 color_mask = static_cast(regs.clear_surface.R | regs.clear_surface.G << 1 | - regs.clear_surface.B << 2 | regs.clear_surface.A << 3); + u8 color_mask = u8(regs.clear_surface.R | regs.clear_surface.G << 1 | regs.clear_surface.B << 2 | regs.clear_surface.A << 3); Region2D dst_region = { Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, - Offset2D{.x = clear_rect.rect.offset.x + - static_cast(clear_rect.rect.extent.width), - .y = clear_rect.rect.offset.y + - static_cast(clear_rect.rect.extent.height)}}; + Offset2D{.x = clear_rect.rect.offset.x + s32(clear_rect.rect.extent.width), + .y = clear_rect.rect.offset.y + s32(clear_rect.rect.extent.height)}}; blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region); } } @@ -527,11 +513,10 @@ void RasterizerVulkan::Clear(u32 layer_count) { regs.stencil_front_mask != 0) { Region2D dst_region = { Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, - Offset2D{.x = clear_rect.rect.offset.x + static_cast(clear_rect.rect.extent.width), - .y = clear_rect.rect.offset.y + - static_cast(clear_rect.rect.extent.height)}}; + Offset2D{.x = clear_rect.rect.offset.x + s32(clear_rect.rect.extent.width), + .y = clear_rect.rect.offset.y + s32(clear_rect.rect.extent.height)}}; blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth, - static_cast(regs.stencil_front_mask), regs.clear_stencil, + u8(regs.stencil_front_mask), regs.clear_stencil, regs.stencil_front_func_mask, dst_region); } else { scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index cff7a73903..a950ffed7a 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -860,8 +860,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, Scheduler& sched compute_pass_descriptor_queue, memory_allocator); } if (device.IsStorageImageMultisampleSupported()) { - msaa_copy_pass = std::make_unique( - device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue); + msaa_copy_pass.emplace(device, scheduler, descriptor_pool, staging_buffer_pool, compute_pass_descriptor_queue); } if (!device.IsKhrImageFormatListSupported()) { return; @@ -1675,10 +1674,10 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, // CHANGE: Gate the MSAA path more strictly and only use it for color, when the pass and device // support are available. Avoid running the MSAA path when prerequisites aren't met, // preventing validation and runtime issues. - const bool wants_msaa_upload = info.num_samples > 1 && - (aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0 && - runtime->CanUploadMSAA() && runtime->msaa_copy_pass != nullptr && - runtime->device.IsStorageImageMultisampleSupported(); + const bool wants_msaa_upload = info.num_samples > 1 + && (aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) != 0 + && runtime->CanUploadMSAA() && runtime->msaa_copy_pass.has_value() + && runtime->device.IsStorageImageMultisampleSupported(); if (wants_msaa_upload) { // Create a temporary non-MSAA image to upload the data first @@ -2047,8 +2046,7 @@ bool Image::BlitScaleHelper(bool scale_up) { const u32 scaled_width = resolution.ScaleUp(info.size.width); const u32 scaled_height = is_2d ? resolution.ScaleUp(info.size.height) : info.size.height; std::unique_ptr& blit_view = scale_up ? scale_view : normal_view; - std::unique_ptr& blit_framebuffer = - scale_up ? scale_framebuffer : normal_framebuffer; + std::optional& blit_framebuffer = scale_up ? scale_framebuffer : normal_framebuffer; if (!blit_view) { const auto view_info = ImageViewInfo(ImageViewType::e2D, info.format); blit_view = std::make_unique(*runtime, view_info, NULL_IMAGE_ID, *this); @@ -2060,11 +2058,11 @@ bool Image::BlitScaleHelper(bool scale_up) { const u32 dst_height = scale_up ? scaled_height : info.size.height; const Region2D src_region{ .start = {0, 0}, - .end = {static_cast(src_width), static_cast(src_height)}, + .end = {s32(src_width), s32(src_height)}, }; const Region2D dst_region{ .start = {0, 0}, - .end = {static_cast(dst_width), static_cast(dst_height)}, + .end = {s32(dst_width), s32(dst_height)}, }; const VkExtent2D extent{ .width = (std::max)(scaled_width, info.size.width), @@ -2073,21 +2071,15 @@ bool Image::BlitScaleHelper(bool scale_up) { auto* view_ptr = blit_view.get(); if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { - if (!blit_framebuffer) { - blit_framebuffer = - std::make_unique(*runtime, view_ptr, nullptr, extent, scale_up); - } - - runtime->blit_image_helper.BlitColor(blit_framebuffer.get(), *blit_view, dst_region, - src_region, operation, BLIT_OPERATION); + if (!blit_framebuffer) + blit_framebuffer.emplace(*runtime, view_ptr, nullptr, extent, scale_up); + runtime->blit_image_helper.BlitColor(&*blit_framebuffer, *blit_view, + dst_region, src_region, operation, BLIT_OPERATION); } else if (aspect_mask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { - if (!blit_framebuffer) { - blit_framebuffer = - std::make_unique(*runtime, nullptr, view_ptr, extent, scale_up); - } - runtime->blit_image_helper.BlitDepthStencil(blit_framebuffer.get(), *blit_view, - dst_region, src_region, operation, - BLIT_OPERATION); + if (!blit_framebuffer) + blit_framebuffer.emplace(*runtime, nullptr, view_ptr, extent, scale_up); + runtime->blit_image_helper.BlitDepthStencil(&*blit_framebuffer, *blit_view, + dst_region, src_region, operation, BLIT_OPERATION); } else { // TODO: Use helper blits where applicable flags &= ~ImageFlagBits::Rescaled; @@ -2200,9 +2192,9 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI } } -ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, - ImageId image_id_, Image& image, const SlotVector& slot_imgs) - : ImageView{runtime, info, image_id_, image} { +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewInfo& info, ImageId image_id_, Image& image, const SlotVector& slot_imgs) + : ImageView{runtime, info, image_id_, image} +{ slot_images = &slot_imgs; } @@ -2267,33 +2259,25 @@ VkImageView ImageView::ColorView() { VkImageView ImageView::StorageView(Shader::TextureType texture_type, Shader::ImageFormat image_format) { - if (!image_handle) { - return VK_NULL_HANDLE; - } - if (image_format == Shader::ImageFormat::Typeless) { - return Handle(texture_type); - } - const bool is_signed{image_format == Shader::ImageFormat::R8_SINT || - image_format == Shader::ImageFormat::R16_SINT}; - if (!storage_views) { - storage_views = std::make_unique(); - } - auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds}; - auto& view{views[static_cast(texture_type)]}; - if (view) { + if (image_handle) { + if (image_format == Shader::ImageFormat::Typeless) { + return Handle(texture_type); + } + const bool is_signed = image_format == Shader::ImageFormat::R8_SINT + || image_format == Shader::ImageFormat::R16_SINT; + if (!storage_views) + storage_views.emplace(); + auto& views{is_signed ? storage_views->signeds : storage_views->unsigneds}; + auto& view{views[size_t(texture_type)]}; + if (!view) + view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT); return *view; } - view = MakeView(Format(image_format), VK_IMAGE_ASPECT_COLOR_BIT); - return *view; + return VK_NULL_HANDLE; } bool ImageView::IsRescaled() const noexcept { - if (!slot_images) { - return false; - } - const auto& slots = *slot_images; - const auto& src_image = slots[image_id]; - return src_image.IsRescaled(); + return (*slot_images)[image_id].IsRescaled(); } vk::ImageView ImageView::MakeView(VkFormat vk_format, VkImageAspectFlags aspect_mask) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index dcc835f05e..4bb9687ab0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -133,7 +133,7 @@ public: vk::Buffer swizzle_table_buffer; VkDeviceSize swizzle_table_size = 0; - std::unique_ptr msaa_copy_pass; + std::optional msaa_copy_pass; const Settings::ResolutionScalingInfo& resolution; std::array, VideoCore::Surface::MaxPixelFormat> view_formats; @@ -141,6 +141,89 @@ public: std::array buffers{}; }; +class Framebuffer { +public: + explicit Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, + ImageView* depth_buffer, const VideoCommon::RenderTargets& key); + + explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, + ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled); + + ~Framebuffer(); + + Framebuffer(const Framebuffer&) = delete; + Framebuffer& operator=(const Framebuffer&) = delete; + + Framebuffer(Framebuffer&&) = default; + Framebuffer& operator=(Framebuffer&&) = default; + + void CreateFramebuffer(TextureCacheRuntime& runtime, + std::span color_buffers, ImageView* depth_buffer, + bool is_rescaled = false); + + [[nodiscard]] VkFramebuffer Handle() const noexcept { + return *framebuffer; + } + + [[nodiscard]] VkRenderPass RenderPass() const noexcept { + return renderpass; + } + + [[nodiscard]] VkExtent2D RenderArea() const noexcept { + return render_area; + } + + [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { + return samples; + } + + [[nodiscard]] u32 NumColorBuffers() const noexcept { + return num_color_buffers; + } + + [[nodiscard]] u32 NumImages() const noexcept { + return num_images; + } + + [[nodiscard]] const std::array& Images() const noexcept { + return images; + } + + [[nodiscard]] const std::array& ImageRanges() const noexcept { + return image_ranges; + } + + [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept { + return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; + } + + [[nodiscard]] bool HasAspectDepthBit() const noexcept { + return has_depth; + } + + [[nodiscard]] bool HasAspectStencilBit() const noexcept { + return has_stencil; + } + + [[nodiscard]] bool IsRescaled() const noexcept { + return is_rescaled; + } + +private: + vk::Framebuffer framebuffer; + VkRenderPass renderpass{}; + VkExtent2D render_area{}; + VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; + u32 num_color_buffers = 0; + u32 num_images = 0; + std::array images{}; + std::array image_ranges{}; + std::array rt_map{}; + bool has_depth{}; + bool has_stencil{}; + bool is_rescaled{}; +}; + class Image : public VideoCommon::ImageBase { public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, @@ -226,10 +309,9 @@ private: VkImageAspectFlags aspect_mask = 0; bool initialized = false; - std::unique_ptr scale_framebuffer; + std::optional scale_framebuffer; + std::optional normal_framebuffer; std::unique_ptr scale_view; - - std::unique_ptr normal_framebuffer; std::unique_ptr normal_view; }; @@ -297,7 +379,7 @@ private: const SlotVector* slot_images = nullptr; std::array image_views; - std::unique_ptr storage_views; + std::optional storage_views; vk::ImageView depth_view; vk::ImageView stencil_view; vk::ImageView color_view; @@ -331,89 +413,6 @@ private: vk::Sampler sampler_default_anisotropy; }; -class Framebuffer { -public: - explicit Framebuffer(TextureCacheRuntime& runtime, std::span color_buffers, - ImageView* depth_buffer, const VideoCommon::RenderTargets& key); - - explicit Framebuffer(TextureCacheRuntime& runtime, ImageView* color_buffer, - ImageView* depth_buffer, VkExtent2D extent, bool is_rescaled); - - ~Framebuffer(); - - Framebuffer(const Framebuffer&) = delete; - Framebuffer& operator=(const Framebuffer&) = delete; - - Framebuffer(Framebuffer&&) = default; - Framebuffer& operator=(Framebuffer&&) = default; - - void CreateFramebuffer(TextureCacheRuntime& runtime, - std::span color_buffers, ImageView* depth_buffer, - bool is_rescaled = false); - - [[nodiscard]] VkFramebuffer Handle() const noexcept { - return *framebuffer; - } - - [[nodiscard]] VkRenderPass RenderPass() const noexcept { - return renderpass; - } - - [[nodiscard]] VkExtent2D RenderArea() const noexcept { - return render_area; - } - - [[nodiscard]] VkSampleCountFlagBits Samples() const noexcept { - return samples; - } - - [[nodiscard]] u32 NumColorBuffers() const noexcept { - return num_color_buffers; - } - - [[nodiscard]] u32 NumImages() const noexcept { - return num_images; - } - - [[nodiscard]] const std::array& Images() const noexcept { - return images; - } - - [[nodiscard]] const std::array& ImageRanges() const noexcept { - return image_ranges; - } - - [[nodiscard]] bool HasAspectColorBit(size_t index) const noexcept { - return (image_ranges.at(rt_map[index]).aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) != 0; - } - - [[nodiscard]] bool HasAspectDepthBit() const noexcept { - return has_depth; - } - - [[nodiscard]] bool HasAspectStencilBit() const noexcept { - return has_stencil; - } - - [[nodiscard]] bool IsRescaled() const noexcept { - return is_rescaled; - } - -private: - vk::Framebuffer framebuffer; - VkRenderPass renderpass{}; - VkExtent2D render_area{}; - VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT; - u32 num_color_buffers = 0; - u32 num_images = 0; - std::array images{}; - std::array image_ranges{}; - std::array rt_map{}; - bool has_depth{}; - bool has_stencil{}; - bool is_rescaled{}; -}; - struct TextureCacheParams { static constexpr bool ENABLE_VALIDATION = true; static constexpr bool FRAMEBUFFER_BLITS = false; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 425c8e23de..53fb57317f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -596,10 +596,10 @@ FramebufferId TextureCache

::GetFramebufferId(const RenderTargets& key) { return framebuffer_id; } std::array color_buffers; - std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), - [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); - ImageView* const depth_buffer = - key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; + std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), [this](ImageViewId id) { + return id ? &slot_image_views[id] : nullptr; + }); + ImageView* const depth_buffer = key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); return framebuffer_id; }