// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #include #include #include "common/arm64/native_clock.h" #include "common/literals.h" #include "core/arm/nce/arm_nce.h" #include "core/arm/nce/guest_context.h" #include "core/arm/nce/instructions.h" #include "core/arm/nce/patcher.h" #include "core/core.h" #include "core/core_timing.h" #include "core/hle/kernel/svc.h" #include "core/memory.h" #include "core/hle/kernel/k_thread.h" namespace Core::NCE { Patcher::Patcher(Patcher&& other) noexcept : patch_cache(std::move(other.patch_cache)), m_patch_instructions(std::move(other.m_patch_instructions)), m_patch_instructions_pre(std::move(other.m_patch_instructions_pre)), c(m_patch_instructions), c_pre(m_patch_instructions_pre), m_save_context(other.m_save_context), m_load_context(other.m_load_context), m_save_context_pre(other.m_save_context_pre), m_load_context_pre(other.m_load_context_pre), mode(other.mode), total_program_size(other.total_program_size), m_relocate_module_index(other.m_relocate_module_index), modules(std::move(other.modules)), curr_patch(nullptr) { if (!modules.empty()) { curr_patch = &modules.back(); } } using namespace Common::Literals; using namespace oaknut::util; using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; constexpr size_t MaxRelativeBranch = 128_MiB; constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32); Patcher::Patcher() : c(m_patch_instructions), c_pre(m_patch_instructions_pre) { LOG_WARNING(Core_ARM, "Patcher initialized with LRU cache {}", patch_cache.isEnabled() ? "enabled" : "disabled"); // The first word of the patch section is always a branch to the first instruction of the // module. c.dw(0); c_pre.dw(0); // Write save context helper function. c.l(m_save_context); WriteSaveContext(); c_pre.l(m_save_context_pre); WriteSaveContext(c_pre); // Write load context helper function. c.l(m_load_context); WriteLoadContext(); c_pre.l(m_load_context_pre); WriteLoadContext(c_pre); } Patcher::~Patcher() = default; bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image, const Kernel::CodeSet::Segment& code) { // If we have patched modules but cannot reach the new module, then it needs its own patcher. const size_t image_size = program_image.size(); // Check if we need split mode for large modules. A64 max takes 128MB // tests showed that, with update, some are larger. (In this case 208MB) bool use_split = false; if (image_size > MaxRelativeBranch) { if (total_program_size > 0) { return false; } use_split = true; } else if (total_program_size + image_size > MaxRelativeBranch && total_program_size > 0) { return false; } // Add a new module patch to our list modules.emplace_back(); curr_patch = &modules.back(); // The first word of the patch section is always a branch to the first instruction of the // module. if (use_split) { // curr_patch->m_branch_to_module_relocations.push_back({0, 0}); curr_patch->m_branch_to_module_relocations_pre.push_back({0, 0}); } else { curr_patch->m_branch_to_module_relocations.push_back({0, 0}); } // Retrieve text segment data. const auto text = std::span{program_image}.subspan(code.offset, code.size); const auto text_words = std::span{reinterpret_cast(text.data()), text.size() / sizeof(u32)}; // Loop through instructions, patching as needed. for (u32 i = ModuleCodeIndex; i < static_cast(text_words.size()); i++) { const u32 inst = text_words[i]; const auto AddRelocations = [&](bool& pre_buffer) { const uintptr_t this_offset = i * sizeof(u32); const uintptr_t next_offset = this_offset + sizeof(u32); pre_buffer = use_split && (this_offset < MaxRelativeBranch); // Relocate to pre- or post-patch if (pre_buffer) { this->BranchToPatchPre(this_offset); } else { this->BranchToPatch(this_offset); } // Relocate from patch to next instruction. return next_offset; }; // SVC if (auto svc = SVC{inst}; svc.Verify()) { bool pre_buffer = false; auto ret = AddRelocations(pre_buffer); if (pre_buffer) { WriteSvcTrampoline(ret, svc.GetValue(), c_pre, m_save_context_pre, m_load_context_pre); } else { WriteSvcTrampoline(ret, svc.GetValue(), c, m_save_context, m_load_context); } continue; } // MRS Xn, TPIDR_EL0 // MRS Xn, TPIDRRO_EL0 if (auto mrs = MRS{inst}; mrs.Verify() && (mrs.GetSystemReg() == TpidrroEl0 || mrs.GetSystemReg() == TpidrEl0)) { const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0 : oaknut::SystemReg::TPIDR_EL0; const auto dest_reg = oaknut::XReg{static_cast(mrs.GetRt())}; bool pre_buffer = false; auto ret = AddRelocations(pre_buffer); if (pre_buffer) { WriteMrsHandler(ret, dest_reg, src_reg, c_pre); } else { WriteMrsHandler(ret, dest_reg, src_reg, c); } continue; } // MRS Xn, CNTPCT_EL0 if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) { bool pre_buffer = false; auto ret = AddRelocations(pre_buffer); if (pre_buffer) { WriteCntpctHandler(ret, oaknut::XReg{static_cast(mrs.GetRt())}, c_pre); } else { WriteCntpctHandler(ret, oaknut::XReg{static_cast(mrs.GetRt())}, c); } continue; } // MRS Xn, CNTFRQ_EL0 if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntfrqEl0) { UNREACHABLE(); } // MSR TPIDR_EL0, Xn if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) { bool pre_buffer = false; auto ret = AddRelocations(pre_buffer); if (pre_buffer) { WriteMsrHandler(ret, oaknut::XReg{static_cast(msr.GetRt())}, c_pre); } else { WriteMsrHandler(ret, oaknut::XReg{static_cast(msr.GetRt())}, c); } continue; } if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { curr_patch->m_exclusives.push_back(i); } } // Determine patching mode for the final relocation step total_program_size += image_size; if (use_split) { this->mode = PatchMode::Split; } else { this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData; } return true; } bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines) { const size_t patch_size = GetSectionSize(); const size_t pre_patch_size = GetPreSectionSize(); const size_t image_size = (mode == PatchMode::Split) ? program_image.size() - pre_patch_size : program_image.size(); // Retrieve text segment data. const auto text = std::span{program_image}.subspan(code.offset, code.size); const auto text_words = std::span{reinterpret_cast(text.data()), text.size() / sizeof(u32)}; const auto ApplyBranchToPatchRelocation = [&](u32* target, const Relocation& rel) { oaknut::CodeGenerator rc{target}; if (mode == PatchMode::PreText) { rc.B(rel.patch_offset - patch_size - rel.module_offset); } else { rc.B(total_program_size - rel.module_offset + rel.patch_offset); } }; const auto ApplyBranchToPatchRelocationPre = [&](u32* target, const Relocation& rel) { oaknut::CodeGenerator rc{target}; rc.B(static_cast(rel.patch_offset) - static_cast(pre_patch_size) - static_cast(rel.module_offset)); }; const auto ApplyBranchToPatchRelocationPostSplit = [&](u32* target, const Relocation& rel) { oaknut::CodeGenerator rc{target}; rc.B(static_cast(image_size) + static_cast(rel.patch_offset) - static_cast(rel.module_offset)); }; const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) { oaknut::CodeGenerator rc{target}; if (mode == PatchMode::PreText) { rc.B(patch_size - rel.patch_offset + rel.module_offset); } else { rc.B(rel.module_offset - total_program_size - rel.patch_offset); } }; const auto ApplyBranchToModuleRelocationPre = [&](u32* target, const Relocation& rel) { oaknut::CodeGenerator rc{target}; rc.B(static_cast(pre_patch_size) + static_cast(rel.module_offset) - static_cast(rel.patch_offset)); }; const auto ApplyBranchToModuleRelocationPostSplit = [&](u32* target, const Relocation& rel) { oaknut::CodeGenerator rc{target}; rc.B(static_cast(rel.module_offset) - static_cast(image_size) - static_cast(rel.patch_offset)); }; const auto RebasePatch = [&](ptrdiff_t patch_offset) { if (mode == PatchMode::PreText) { return GetInteger(load_base) + patch_offset; } else { return GetInteger(load_base) + total_program_size + patch_offset; } }; const auto RebasePc = [&](uintptr_t module_offset) { if (mode == PatchMode::PreText) { return GetInteger(load_base) + patch_size + module_offset; } if (mode == PatchMode::Split) { return GetInteger(load_base) + pre_patch_size + module_offset; } return GetInteger(load_base) + module_offset; }; // We are now ready to relocate! auto& patch = modules[m_relocate_module_index++]; if (mode == PatchMode::Split) { for (const Relocation& rel : patch.m_branch_to_pre_patch_relocations) { ApplyBranchToPatchRelocationPre(text_words.data() + rel.module_offset / sizeof(u32), rel); } LOG_DEBUG(Core_ARM, "applied Pre: {}", patch.m_branch_to_pre_patch_relocations.size()); for (const Relocation& rel : patch.m_branch_to_patch_relocations) { ApplyBranchToPatchRelocationPostSplit(text_words.data() + rel.module_offset / sizeof(u32), rel); } LOG_DEBUG(Core_ARM, "applied Post: {}", patch.m_branch_to_patch_relocations.size()); for (const Relocation& rel : patch.m_branch_to_module_relocations_pre) { ApplyBranchToModuleRelocationPre(m_patch_instructions_pre.data() + rel.patch_offset / sizeof(u32), rel); } LOG_DEBUG(Core_ARM, "aplied Pre-module {}", patch.m_branch_to_module_relocations_pre.size()); for (const Relocation& rel : patch.m_branch_to_module_relocations) { ApplyBranchToModuleRelocationPostSplit(m_patch_instructions.data() + rel.patch_offset / sizeof(u32), rel); } LOG_DEBUG(Core_ARM, "applied Post-module {}", patch.m_branch_to_module_relocations.size()); // Pre for (const Relocation& rel : patch.m_write_module_pc_relocations_pre) { oaknut::CodeGenerator rc{m_patch_instructions_pre.data() + rel.patch_offset / sizeof(u32)}; rc.dx(RebasePc(rel.module_offset)); } // Post for (const Relocation& rel : patch.m_write_module_pc_relocations) { oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)}; rc.dx(RebasePc(rel.module_offset)); } // Trampolines (split pre + post) for (const Trampoline& rel : patch.m_trampolines_pre) { out_trampolines->insert({RebasePc(rel.module_offset), GetInteger(load_base) + rel.patch_offset}); } for (const Trampoline& rel : patch.m_trampolines) { out_trampolines->insert({RebasePc(rel.module_offset), GetInteger(load_base) + pre_patch_size + image_size + rel.patch_offset}); } if (!m_patch_instructions_pre.empty()) { u32 insn = m_patch_instructions_pre[0]; if ((insn & 0xFC000000) == 0x14000000) { s32 imm26 = insn & 0x3FFFFFF; // Sign extend if (imm26 & 0x2000000) imm26 |= 0xFC000000; } } } else { for (const Relocation& rel : patch.m_branch_to_patch_relocations) { ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel); } for (const Relocation& rel : patch.m_branch_to_module_relocations) { ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32), rel); } // Rewrite PC constants for (const Relocation& rel : patch.m_write_module_pc_relocations) { oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)}; rc.dx(RebasePc(rel.module_offset)); } } if (mode != PatchMode::Split) { for (const Trampoline& rel : patch.m_trampolines) { out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)}); } } // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not. // Convert to ordered to preserve this assumption. for (const ModuleTextAddress i : patch.m_exclusives) { auto exclusive = Exclusive{text_words[i]}; text_words[i] = exclusive.AsOrdered(); } // Remove the patched module size from the total. This is done so total_program_size // always represents the distance from the currently patched module to the patch section. total_program_size -= image_size; // Only copy to the program image of the last module if (m_relocate_module_index == modules.size()) { if (this->mode == PatchMode::PreText) { ASSERT(image_size == total_program_size); std::memcpy(program_image.data(), m_patch_instructions.data(), m_patch_instructions.size() * sizeof(u32)); } else if (this->mode == PatchMode::Split) { const size_t current_size = program_image.size(); program_image.resize(current_size + patch_size); // Copy pre-patch buffer to the beginning std::memcpy(program_image.data(), m_patch_instructions_pre.data(), m_patch_instructions_pre.size() * sizeof(u32)); // Same for post-patch buffer to the end std::memcpy(program_image.data() + current_size, m_patch_instructions.data(), m_patch_instructions.size() * sizeof(u32)); } else { program_image.resize(image_size + patch_size); std::memcpy(program_image.data() + image_size, m_patch_instructions.data(), m_patch_instructions.size() * sizeof(u32)); } return true; } return false; } size_t Patcher::GetSectionSize() const noexcept { return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE); } size_t Patcher::GetPreSectionSize() const noexcept { return Common::AlignUp(m_patch_instructions_pre.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE); } void Patcher::WriteLoadContext(oaknut::VectorCodeGenerator& cg) { // This function was called, which modifies X30, so use that as a scratch register. // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes // of stack. cg.STR(X30, SP, 8); cg.MRS(X30, oaknut::SystemReg::TPIDR_EL0); cg.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); // Load system registers. cg.LDR(W0, X30, offsetof(GuestContext, fpsr)); cg.MSR(oaknut::SystemReg::FPSR, X0); cg.LDR(W0, X30, offsetof(GuestContext, fpcr)); cg.MSR(oaknut::SystemReg::FPCR, X0); cg.LDR(W0, X30, offsetof(GuestContext, nzcv)); cg.MSR(oaknut::SystemReg::NZCV, X0); // Load all vector registers. static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); for (int i = 0; i <= 30; i += 2) { cg.LDP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); } // Load all general-purpose registers except X30. for (int i = 0; i <= 28; i += 2) { cg.LDP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); } // Reload our return X30 from the stack and return. // The patch code will reload the guest X30 for us. cg.LDR(X30, SP, 8); cg.RET(); } void Patcher::WriteSaveContext(oaknut::VectorCodeGenerator& cg) { // This function was called, which modifies X30, so use that as a scratch register. // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of // stack. cg.STR(X30, SP, 8); cg.MRS(X30, oaknut::SystemReg::TPIDR_EL0); cg.LDR(X30, X30, offsetof(NativeExecutionParameters, native_context)); // Store all general-purpose registers except X30. for (int i = 0; i <= 28; i += 2) { cg.STP(oaknut::XReg{i}, oaknut::XReg{i + 1}, X30, 8 * i); } // Store all vector registers. static constexpr size_t VEC_OFF = offsetof(GuestContext, vector_registers); for (int i = 0; i <= 30; i += 2) { cg.STP(oaknut::QReg{i}, oaknut::QReg{i + 1}, X30, VEC_OFF + 16 * i); } // Store guest system registers, X30 and SP, using X0 as a scratch register. cg.STR(X0, SP, PRE_INDEXED, -16); cg.LDR(X0, SP, 16); cg.STR(X0, X30, 8 * 30); cg.ADD(X0, SP, 32); cg.STR(X0, X30, offsetof(GuestContext, sp)); cg.MRS(X0, oaknut::SystemReg::FPSR); cg.STR(W0, X30, offsetof(GuestContext, fpsr)); cg.MRS(X0, oaknut::SystemReg::FPCR); cg.STR(W0, X30, offsetof(GuestContext, fpcr)); cg.MRS(X0, oaknut::SystemReg::NZCV); cg.STR(W0, X30, offsetof(GuestContext, nzcv)); cg.LDR(X0, SP, POST_INDEXED, 16); // Reload our return X30 from the stack, and return. cg.LDR(X30, SP, 8); cg.RET(); } void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut::VectorCodeGenerator& cg, oaknut::Label& save_ctx, oaknut::Label& load_ctx) { // Determine if we're writing to the pre-patch buffer const bool is_pre = (&cg == &c_pre); // We are about to start saving state, so we need to lock the context. this->LockContext(cg); // Store guest X30 to the stack. Then, save the context and restore the stack. // This will save all registers except PC, but we know PC at patch time. cg.STR(X30, SP, PRE_INDEXED, -16); cg.BL(save_ctx); cg.LDR(X30, SP, POST_INDEXED, 16); // Now that we've saved all registers, we can use any registers as scratch. // Store PC + 4 to arm interface, since we know the instruction offset from the entry point. oaknut::Label pc_after_svc; cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0); cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); cg.LDR(X2, pc_after_svc); cg.STR(X2, X1, offsetof(GuestContext, pc)); // Store SVC number to execute when we return cg.MOV(X2, svc_id); cg.STR(W2, X1, offsetof(GuestContext, svc)); // We are calling a SVC. Clear esr_el1 and return it. static_assert(std::is_same_v, u64>); oaknut::Label retry; cg.ADD(X2, X1, offsetof(GuestContext, esr_el1)); cg.l(retry); cg.LDAXR(X0, X2); cg.STLXR(W3, XZR, X2); cg.CBNZ(W3, retry); // Add "calling SVC" flag. Since this is X0, this is now our return value. cg.ORR(X0, X0, static_cast(HaltReason::SupervisorCall)); // Offset the GuestContext pointer to the HostContext member. // STP has limited range of [-512, 504] which we can't reach otherwise // NB: Due to this all offsets below are from the start of HostContext. cg.ADD(X1, X1, offsetof(GuestContext, host_ctx)); // Reload host TPIDR_EL0 and SP. static_assert(offsetof(HostContext, host_sp) + 8 == offsetof(HostContext, host_tpidr_el0)); cg.LDP(X2, X3, X1, offsetof(HostContext, host_sp)); cg.MOV(SP, X2); cg.MSR(oaknut::SystemReg::TPIDR_EL0, X3); // Load callee-saved host registers and return to host. static constexpr size_t HOST_REGS_OFF = offsetof(HostContext, host_saved_regs); static constexpr size_t HOST_VREGS_OFF = offsetof(HostContext, host_saved_vregs); cg.LDP(X19, X20, X1, HOST_REGS_OFF); cg.LDP(X21, X22, X1, HOST_REGS_OFF + 2 * sizeof(u64)); cg.LDP(X23, X24, X1, HOST_REGS_OFF + 4 * sizeof(u64)); cg.LDP(X25, X26, X1, HOST_REGS_OFF + 6 * sizeof(u64)); cg.LDP(X27, X28, X1, HOST_REGS_OFF + 8 * sizeof(u64)); cg.LDP(X29, X30, X1, HOST_REGS_OFF + 10 * sizeof(u64)); cg.LDP(Q8, Q9, X1, HOST_VREGS_OFF); cg.LDP(Q10, Q11, X1, HOST_VREGS_OFF + 2 * sizeof(u128)); cg.LDP(Q12, Q13, X1, HOST_VREGS_OFF + 4 * sizeof(u128)); cg.LDP(Q14, Q15, X1, HOST_VREGS_OFF + 6 * sizeof(u128)); cg.RET(); // Write the post-SVC trampoline address, which will jump back to the guest after restoring its // state. if (is_pre) { curr_patch->m_trampolines_pre.push_back({cg.offset(), module_dest}); } else { curr_patch->m_trampolines.push_back({cg.offset(), module_dest}); } // Host called this location. Save the return address so we can // unwind the stack properly when jumping back. cg.MRS(X2, oaknut::SystemReg::TPIDR_EL0); cg.LDR(X2, X2, offsetof(NativeExecutionParameters, native_context)); cg.ADD(X0, X2, offsetof(GuestContext, host_ctx)); cg.STR(X30, X0, offsetof(HostContext, host_saved_regs) + 11 * sizeof(u64)); // Reload all guest registers except X30 and PC. // The function also expects 16 bytes of stack already allocated. cg.STR(X30, SP, PRE_INDEXED, -16); cg.BL(load_ctx); cg.LDR(X30, SP, POST_INDEXED, 16); // Use X1 as a scratch register to restore X30. cg.STR(X1, SP, PRE_INDEXED, -16); cg.MRS(X1, oaknut::SystemReg::TPIDR_EL0); cg.LDR(X1, X1, offsetof(NativeExecutionParameters, native_context)); cg.LDR(X30, X1, offsetof(GuestContext, cpu_registers) + sizeof(u64) * 30); cg.LDR(X1, SP, POST_INDEXED, 16); // Unlock the context. this->UnlockContext(cg); // Jump back to the instruction after the emulated SVC. if (&cg == &c_pre) this->BranchToModulePre(module_dest); else this->BranchToModule(module_dest); // Store PC after call. cg.l(pc_after_svc); if (&cg == &c_pre) this->WriteModulePcPre(module_dest); else this->WriteModulePc(module_dest); } void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& cg) { // Retrieve emulated TLS register from GuestContext. cg.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0); if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) { cg.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidrro_el0)); } else { cg.LDR(dest_reg, dest_reg, offsetof(NativeExecutionParameters, tpidr_el0)); } // Jump back to the instruction after the emulated MRS. if (&cg == &c_pre) this->BranchToModulePre(module_dest); else this->BranchToModule(module_dest); } void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg, oaknut::VectorCodeGenerator& cg) { const auto scratch_reg = src_reg.index() == 0 ? X1 : X0; cg.STR(scratch_reg, SP, PRE_INDEXED, -16); // Save guest value to NativeExecutionParameters::tpidr_el0. cg.MRS(scratch_reg, oaknut::SystemReg::TPIDR_EL0); cg.STR(src_reg, scratch_reg, offsetof(NativeExecutionParameters, tpidr_el0)); // Restore scratch register. cg.LDR(scratch_reg, SP, POST_INDEXED, 16); // Jump back to the instruction after the emulated MSR. if (&cg == &c_pre) this->BranchToModulePre(module_dest); else this->BranchToModule(module_dest); } void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::VectorCodeGenerator& cg) { static Common::Arm64::NativeClock clock{}; const auto factor = clock.GetGuestCNTFRQFactor(); const auto raw_factor = std::bit_cast>(factor); const auto use_x2_x3 = dest_reg.index() == 0 || dest_reg.index() == 1; oaknut::XReg scratch0 = use_x2_x3 ? X2 : X0; oaknut::XReg scratch1 = use_x2_x3 ? X3 : X1; oaknut::Label factorlo; oaknut::Label factorhi; // Save scratches. cg.STP(scratch0, scratch1, SP, PRE_INDEXED, -16); // Load counter value. cg.MRS(dest_reg, oaknut::SystemReg::CNTVCT_EL0); // Load scaling factor. cg.LDR(scratch0, factorlo); cg.LDR(scratch1, factorhi); // Multiply low bits and get result. cg.UMULH(scratch0, dest_reg, scratch0); // Multiply high bits and add low bit result. cg.MADD(dest_reg, dest_reg, scratch1, scratch0); // Reload scratches. cg.LDP(scratch0, scratch1, SP, POST_INDEXED, 16); // Jump back to the instruction after the emulated MRS. if (&cg == &c_pre) this->BranchToModulePre(module_dest); else this->BranchToModule(module_dest); // Scaling factor constant values. cg.l(factorlo); cg.dx(raw_factor[0]); cg.l(factorhi); cg.dx(raw_factor[1]); } void Patcher::LockContext(oaknut::VectorCodeGenerator& cg) { oaknut::Label retry; // Save scratches. cg.STP(X0, X1, SP, PRE_INDEXED, -16); // Reload lock pointer. cg.l(retry); cg.CLREX(); cg.MRS(X0, oaknut::SystemReg::TPIDR_EL0); cg.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); static_assert(SpinLockLocked == 0); // Load-linked with acquire ordering. cg.LDAXR(W1, X0); // If the value was SpinLockLocked, clear monitor and retry. cg.CBZ(W1, retry); // Store-conditional SpinLockLocked with relaxed ordering. cg.STXR(W1, WZR, X0); // If we failed to store, retry. cg.CBNZ(W1, retry); // We succeeded! Reload scratches. cg.LDP(X0, X1, SP, POST_INDEXED, 16); } void Patcher::UnlockContext(oaknut::VectorCodeGenerator& cg) { // Save scratches. cg.STP(X0, X1, SP, PRE_INDEXED, -16); // Load lock pointer. cg.MRS(X0, oaknut::SystemReg::TPIDR_EL0); cg.ADD(X0, X0, offsetof(NativeExecutionParameters, lock)); // Load SpinLockUnlocked. cg.MOV(W1, SpinLockUnlocked); // Store value with release ordering. cg.STLR(W1, X0); // Load scratches. cg.LDP(X0, X1, SP, POST_INDEXED, 16); } } // namespace Core::NCE