add split patchmode for larger than 128mb due to a64 limit

This commit is contained in:
Maufeat 2026-02-07 06:31:52 +01:00 committed by crueter
parent 71e035f83b
commit 89aa820635
5 changed files with 321 additions and 61 deletions

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
#include <numeric> #include <numeric>
@ -20,9 +20,13 @@ namespace Core::NCE {
Patcher::Patcher(Patcher&& other) noexcept Patcher::Patcher(Patcher&& other) noexcept
: patch_cache(std::move(other.patch_cache)), : patch_cache(std::move(other.patch_cache)),
m_patch_instructions(std::move(other.m_patch_instructions)), m_patch_instructions(std::move(other.m_patch_instructions)),
m_patch_instructions_pre(std::move(other.m_patch_instructions_pre)),
c(m_patch_instructions), c(m_patch_instructions),
c_pre(m_patch_instructions_pre),
m_save_context(other.m_save_context), m_save_context(other.m_save_context),
m_load_context(other.m_load_context), m_load_context(other.m_load_context),
m_save_context_pre(other.m_save_context_pre),
m_load_context_pre(other.m_load_context_pre),
mode(other.mode), mode(other.mode),
total_program_size(other.total_program_size), total_program_size(other.total_program_size),
m_relocate_module_index(other.m_relocate_module_index), m_relocate_module_index(other.m_relocate_module_index),
@ -42,20 +46,25 @@ using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters;
constexpr size_t MaxRelativeBranch = 128_MiB; constexpr size_t MaxRelativeBranch = 128_MiB;
constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32); constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32);
Patcher::Patcher() : c(m_patch_instructions) { Patcher::Patcher() : c(m_patch_instructions), c_pre(m_patch_instructions_pre) {
LOG_WARNING(Core_ARM, "Patcher initialized with LRU cache {}", LOG_WARNING(Core_ARM, "Patcher initialized with LRU cache {}",
patch_cache.isEnabled() ? "enabled" : "disabled"); patch_cache.isEnabled() ? "enabled" : "disabled");
// The first word of the patch section is always a branch to the first instruction of the // The first word of the patch section is always a branch to the first instruction of the
// module. // module.
c.dw(0); c.dw(0);
c_pre.dw(0);
// Write save context helper function. // Write save context helper function.
c.l(m_save_context); c.l(m_save_context);
WriteSaveContext(); WriteSaveContext();
c_pre.l(m_save_context_pre);
WriteSaveContext(c_pre);
// Write load context helper function. // Write load context helper function.
c.l(m_load_context); c.l(m_load_context);
WriteLoadContext(); WriteLoadContext();
c_pre.l(m_load_context_pre);
WriteLoadContext(c_pre);
} }
Patcher::~Patcher() = default; Patcher::~Patcher() = default;
@ -64,7 +73,16 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
const Kernel::CodeSet::Segment& code) { const Kernel::CodeSet::Segment& code) {
// If we have patched modules but cannot reach the new module, then it needs its own patcher. // If we have patched modules but cannot reach the new module, then it needs its own patcher.
const size_t image_size = program_image.size(); const size_t image_size = program_image.size();
if (total_program_size + image_size > MaxRelativeBranch && total_program_size > 0) {
// Check if we need split mode for large modules. A64 max takes 128MB
// tests showed that, with update, some are larger. (In this case 208MB)
bool use_split = false;
if (image_size > MaxRelativeBranch) {
if (total_program_size > 0) {
return false;
}
use_split = true;
} else if (total_program_size + image_size > MaxRelativeBranch && total_program_size > 0) {
return false; return false;
} }
@ -74,7 +92,12 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
// The first word of the patch section is always a branch to the first instruction of the // The first word of the patch section is always a branch to the first instruction of the
// module. // module.
curr_patch->m_branch_to_module_relocations.push_back({0, 0}); if (use_split) {
// curr_patch->m_branch_to_module_relocations.push_back({0, 0});
curr_patch->m_branch_to_module_relocations_pre.push_back({0, 0});
} else {
curr_patch->m_branch_to_module_relocations.push_back({0, 0});
}
// Retrieve text segment data. // Retrieve text segment data.
const auto text = std::span{program_image}.subspan(code.offset, code.size); const auto text = std::span{program_image}.subspan(code.offset, code.size);
@ -85,12 +108,18 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) { for (u32 i = ModuleCodeIndex; i < static_cast<u32>(text_words.size()); i++) {
const u32 inst = text_words[i]; const u32 inst = text_words[i];
const auto AddRelocations = [&] { const auto AddRelocations = [&](bool& pre_buffer) {
const uintptr_t this_offset = i * sizeof(u32); const uintptr_t this_offset = i * sizeof(u32);
const uintptr_t next_offset = this_offset + sizeof(u32); const uintptr_t next_offset = this_offset + sizeof(u32);
// Relocate from here to patch. pre_buffer = use_split && (this_offset < MaxRelativeBranch);
this->BranchToPatch(this_offset);
// Relocate to pre- or post-patch
if (pre_buffer) {
this->BranchToPatchPre(this_offset);
} else {
this->BranchToPatch(this_offset);
}
// Relocate from patch to next instruction. // Relocate from patch to next instruction.
return next_offset; return next_offset;
@ -98,7 +127,13 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
// SVC // SVC
if (auto svc = SVC{inst}; svc.Verify()) { if (auto svc = SVC{inst}; svc.Verify()) {
WriteSvcTrampoline(AddRelocations(), svc.GetValue()); bool pre_buffer = false;
auto ret = AddRelocations(pre_buffer);
if (pre_buffer) {
WriteSvcTrampoline(ret, svc.GetValue(), c_pre, m_save_context_pre, m_load_context_pre);
} else {
WriteSvcTrampoline(ret, svc.GetValue(), c, m_save_context, m_load_context);
}
continue; continue;
} }
@ -109,13 +144,25 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0 const auto src_reg = mrs.GetSystemReg() == TpidrroEl0 ? oaknut::SystemReg::TPIDRRO_EL0
: oaknut::SystemReg::TPIDR_EL0; : oaknut::SystemReg::TPIDR_EL0;
const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())}; const auto dest_reg = oaknut::XReg{static_cast<int>(mrs.GetRt())};
WriteMrsHandler(AddRelocations(), dest_reg, src_reg); bool pre_buffer = false;
auto ret = AddRelocations(pre_buffer);
if (pre_buffer) {
WriteMrsHandler(ret, dest_reg, src_reg, c_pre);
} else {
WriteMrsHandler(ret, dest_reg, src_reg, c);
}
continue; continue;
} }
// MRS Xn, CNTPCT_EL0 // MRS Xn, CNTPCT_EL0
if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) { if (auto mrs = MRS{inst}; mrs.Verify() && mrs.GetSystemReg() == CntpctEl0) {
WriteCntpctHandler(AddRelocations(), oaknut::XReg{static_cast<int>(mrs.GetRt())}); bool pre_buffer = false;
auto ret = AddRelocations(pre_buffer);
if (pre_buffer) {
WriteCntpctHandler(ret, oaknut::XReg{static_cast<int>(mrs.GetRt())}, c_pre);
} else {
WriteCntpctHandler(ret, oaknut::XReg{static_cast<int>(mrs.GetRt())}, c);
}
continue; continue;
} }
@ -126,7 +173,13 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
// MSR TPIDR_EL0, Xn // MSR TPIDR_EL0, Xn
if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) { if (auto msr = MSR{inst}; msr.Verify() && msr.GetSystemReg() == TpidrEl0) {
WriteMsrHandler(AddRelocations(), oaknut::XReg{static_cast<int>(msr.GetRt())}); bool pre_buffer = false;
auto ret = AddRelocations(pre_buffer);
if (pre_buffer) {
WriteMsrHandler(ret, oaknut::XReg{static_cast<int>(msr.GetRt())}, c_pre);
} else {
WriteMsrHandler(ret, oaknut::XReg{static_cast<int>(msr.GetRt())}, c);
}
continue; continue;
} }
@ -137,7 +190,11 @@ bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image,
// Determine patching mode for the final relocation step // Determine patching mode for the final relocation step
total_program_size += image_size; total_program_size += image_size;
this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData; if (use_split) {
this->mode = PatchMode::Split;
} else {
this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData;
}
return true; return true;
} }
@ -146,7 +203,9 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
Kernel::PhysicalMemory& program_image, Kernel::PhysicalMemory& program_image,
EntryTrampolines* out_trampolines) { EntryTrampolines* out_trampolines) {
const size_t patch_size = GetSectionSize(); const size_t patch_size = GetSectionSize();
const size_t image_size = program_image.size(); const size_t pre_patch_size = GetPreSectionSize();
const size_t image_size = (mode == PatchMode::Split) ? program_image.size() - pre_patch_size : program_image.size();
// Retrieve text segment data. // Retrieve text segment data.
const auto text = std::span{program_image}.subspan(code.offset, code.size); const auto text = std::span{program_image}.subspan(code.offset, code.size);
@ -162,6 +221,16 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
} }
}; };
const auto ApplyBranchToPatchRelocationPre = [&](u32* target, const Relocation& rel) {
oaknut::CodeGenerator rc{target};
rc.B(static_cast<ptrdiff_t>(rel.patch_offset) - static_cast<ptrdiff_t>(pre_patch_size) - static_cast<ptrdiff_t>(rel.module_offset));
};
const auto ApplyBranchToPatchRelocationPostSplit = [&](u32* target, const Relocation& rel) {
oaknut::CodeGenerator rc{target};
rc.B(static_cast<ptrdiff_t>(image_size) + static_cast<ptrdiff_t>(rel.patch_offset) - static_cast<ptrdiff_t>(rel.module_offset));
};
const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) { const auto ApplyBranchToModuleRelocation = [&](u32* target, const Relocation& rel) {
oaknut::CodeGenerator rc{target}; oaknut::CodeGenerator rc{target};
if (mode == PatchMode::PreText) { if (mode == PatchMode::PreText) {
@ -171,6 +240,16 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
} }
}; };
const auto ApplyBranchToModuleRelocationPre = [&](u32* target, const Relocation& rel) {
oaknut::CodeGenerator rc{target};
rc.B(static_cast<ptrdiff_t>(pre_patch_size) + static_cast<ptrdiff_t>(rel.module_offset) - static_cast<ptrdiff_t>(rel.patch_offset));
};
const auto ApplyBranchToModuleRelocationPostSplit = [&](u32* target, const Relocation& rel) {
oaknut::CodeGenerator rc{target};
rc.B(static_cast<ptrdiff_t>(rel.module_offset) - static_cast<ptrdiff_t>(image_size) - static_cast<ptrdiff_t>(rel.patch_offset));
};
const auto RebasePatch = [&](ptrdiff_t patch_offset) { const auto RebasePatch = [&](ptrdiff_t patch_offset) {
if (mode == PatchMode::PreText) { if (mode == PatchMode::PreText) {
return GetInteger(load_base) + patch_offset; return GetInteger(load_base) + patch_offset;
@ -182,28 +261,91 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
const auto RebasePc = [&](uintptr_t module_offset) { const auto RebasePc = [&](uintptr_t module_offset) {
if (mode == PatchMode::PreText) { if (mode == PatchMode::PreText) {
return GetInteger(load_base) + patch_size + module_offset; return GetInteger(load_base) + patch_size + module_offset;
} else {
return GetInteger(load_base) + module_offset;
} }
if (mode == PatchMode::Split) {
return GetInteger(load_base) + pre_patch_size + module_offset;
}
return GetInteger(load_base) + module_offset;
}; };
// We are now ready to relocate! // We are now ready to relocate!
auto& patch = modules[m_relocate_module_index++]; auto& patch = modules[m_relocate_module_index++];
for (const Relocation& rel : patch.m_branch_to_patch_relocations) {
ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel); if (mode == PatchMode::Split) {
} const u32* raw_at_0 = reinterpret_cast<const u32*>(program_image.data());
for (const Relocation& rel : patch.m_branch_to_module_relocations) { const u32* raw_at_offset = reinterpret_cast<const u32*>(program_image.data() + code.offset);
ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32),
rel); for (const Relocation& rel : patch.m_branch_to_pre_patch_relocations) {
ApplyBranchToPatchRelocationPre(text_words.data() + rel.module_offset / sizeof(u32), rel);
}
LOG_DEBUG(Core_ARM, "applied Pre: {}", patch.m_branch_to_pre_patch_relocations.size());
for (const Relocation& rel : patch.m_branch_to_patch_relocations) {
ApplyBranchToPatchRelocationPostSplit(text_words.data() + rel.module_offset / sizeof(u32), rel);
}
LOG_DEBUG(Core_ARM, "applied Post: {}", patch.m_branch_to_patch_relocations.size());
for (const Relocation& rel : patch.m_branch_to_module_relocations_pre) {
ApplyBranchToModuleRelocationPre(m_patch_instructions_pre.data() + rel.patch_offset / sizeof(u32), rel);
}
LOG_DEBUG(Core_ARM, "aplied Pre-module {}", patch.m_branch_to_module_relocations_pre.size());
for (const Relocation& rel : patch.m_branch_to_module_relocations) {
ApplyBranchToModuleRelocationPostSplit(m_patch_instructions.data() + rel.patch_offset / sizeof(u32), rel);
}
LOG_DEBUG(Core_ARM, "applied Post-module {}", patch.m_branch_to_module_relocations.size());
// Pre
for (const Relocation& rel : patch.m_write_module_pc_relocations_pre) {
oaknut::CodeGenerator rc{m_patch_instructions_pre.data() + rel.patch_offset / sizeof(u32)};
rc.dx(RebasePc(rel.module_offset));
}
// Post
for (const Relocation& rel : patch.m_write_module_pc_relocations) {
oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
rc.dx(RebasePc(rel.module_offset));
}
// Trampolines (split pre + post)
for (const Trampoline& rel : patch.m_trampolines_pre) {
out_trampolines->insert({RebasePc(rel.module_offset),
GetInteger(load_base) + rel.patch_offset});
}
for (const Trampoline& rel : patch.m_trampolines) {
out_trampolines->insert({RebasePc(rel.module_offset),
GetInteger(load_base) + pre_patch_size + image_size + rel.patch_offset});
}
if (!m_patch_instructions_pre.empty()) {
u32 insn = m_patch_instructions_pre[0];
if ((insn & 0xFC000000) == 0x14000000) {
s32 imm26 = insn & 0x3FFFFFF;
// Sign extend
if (imm26 & 0x2000000) imm26 |= 0xFC000000;
s64 offset = static_cast<s64>(imm26) * 4;
}
}
} else {
for (const Relocation& rel : patch.m_branch_to_patch_relocations) {
ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel);
}
for (const Relocation& rel : patch.m_branch_to_module_relocations) {
ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32),
rel);
}
// Rewrite PC constants
for (const Relocation& rel : patch.m_write_module_pc_relocations) {
oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)};
rc.dx(RebasePc(rel.module_offset));
}
} }
// Rewrite PC constants and record post trampolines if (mode != PatchMode::Split) {
for (const Relocation& rel : patch.m_write_module_pc_relocations) { for (const Trampoline& rel : patch.m_trampolines) {
oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)}; out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)});
rc.dx(RebasePc(rel.module_offset)); }
}
for (const Trampoline& rel : patch.m_trampolines) {
out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)});
} }
// Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not. // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not.
@ -223,6 +365,15 @@ bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base,
ASSERT(image_size == total_program_size); ASSERT(image_size == total_program_size);
std::memcpy(program_image.data(), m_patch_instructions.data(), std::memcpy(program_image.data(), m_patch_instructions.data(),
m_patch_instructions.size() * sizeof(u32)); m_patch_instructions.size() * sizeof(u32));
} else if (this->mode == PatchMode::Split) {
const size_t current_size = program_image.size();
program_image.resize(current_size + patch_size);
// Copy pre-patch buffer to the beginning
std::memcpy(program_image.data(), m_patch_instructions_pre.data(),
m_patch_instructions_pre.size() * sizeof(u32));
// Same for post-patch buffer to the end
std::memcpy(program_image.data() + current_size, m_patch_instructions.data(),
m_patch_instructions.size() * sizeof(u32));
} else { } else {
program_image.resize(image_size + patch_size); program_image.resize(image_size + patch_size);
std::memcpy(program_image.data() + image_size, m_patch_instructions.data(), std::memcpy(program_image.data() + image_size, m_patch_instructions.data(),
@ -238,7 +389,11 @@ size_t Patcher::GetSectionSize() const noexcept {
return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE); return Common::AlignUp(m_patch_instructions.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE);
} }
void Patcher::WriteLoadContext() { size_t Patcher::GetPreSectionSize() const noexcept {
return Common::AlignUp(m_patch_instructions_pre.size() * sizeof(u32), Core::Memory::YUZU_PAGESIZE);
}
void Patcher::WriteLoadContext(oaknut::VectorCodeGenerator& c) {
// This function was called, which modifies X30, so use that as a scratch register. // This function was called, which modifies X30, so use that as a scratch register.
// SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes // SP contains the guest X30, so save our return X30 to SP + 8, since we have allocated 16 bytes
// of stack. // of stack.
@ -271,7 +426,7 @@ void Patcher::WriteLoadContext() {
c.RET(); c.RET();
} }
void Patcher::WriteSaveContext() { void Patcher::WriteSaveContext(oaknut::VectorCodeGenerator& c) {
// This function was called, which modifies X30, so use that as a scratch register. // This function was called, which modifies X30, so use that as a scratch register.
// SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of // SP contains the guest X30, so save our X30 to SP + 8, since we have allocated 16 bytes of
// stack. // stack.
@ -309,14 +464,17 @@ void Patcher::WriteSaveContext() {
c.RET(); c.RET();
} }
void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut::VectorCodeGenerator& c, oaknut::Label& save_ctx, oaknut::Label& load_ctx) {
// Determine if we're writing to the pre-patch buffer
const bool is_pre = (&c == &c_pre);
// We are about to start saving state, so we need to lock the context. // We are about to start saving state, so we need to lock the context.
this->LockContext(); this->LockContext(c);
// Store guest X30 to the stack. Then, save the context and restore the stack. // Store guest X30 to the stack. Then, save the context and restore the stack.
// This will save all registers except PC, but we know PC at patch time. // This will save all registers except PC, but we know PC at patch time.
c.STR(X30, SP, PRE_INDEXED, -16); c.STR(X30, SP, PRE_INDEXED, -16);
c.BL(m_save_context); c.BL(save_ctx);
c.LDR(X30, SP, POST_INDEXED, 16); c.LDR(X30, SP, POST_INDEXED, 16);
// Now that we've saved all registers, we can use any registers as scratch. // Now that we've saved all registers, we can use any registers as scratch.
@ -371,7 +529,11 @@ void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {
// Write the post-SVC trampoline address, which will jump back to the guest after restoring its // Write the post-SVC trampoline address, which will jump back to the guest after restoring its
// state. // state.
curr_patch->m_trampolines.push_back({c.offset(), module_dest}); if (is_pre) {
curr_patch->m_trampolines_pre.push_back({c.offset(), module_dest});
} else {
curr_patch->m_trampolines.push_back({c.offset(), module_dest});
}
// Host called this location. Save the return address so we can // Host called this location. Save the return address so we can
// unwind the stack properly when jumping back. // unwind the stack properly when jumping back.
@ -383,7 +545,7 @@ void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {
// Reload all guest registers except X30 and PC. // Reload all guest registers except X30 and PC.
// The function also expects 16 bytes of stack already allocated. // The function also expects 16 bytes of stack already allocated.
c.STR(X30, SP, PRE_INDEXED, -16); c.STR(X30, SP, PRE_INDEXED, -16);
c.BL(m_load_context); c.BL(load_ctx);
c.LDR(X30, SP, POST_INDEXED, 16); c.LDR(X30, SP, POST_INDEXED, 16);
// Use X1 as a scratch register to restore X30. // Use X1 as a scratch register to restore X30.
@ -394,18 +556,24 @@ void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) {
c.LDR(X1, SP, POST_INDEXED, 16); c.LDR(X1, SP, POST_INDEXED, 16);
// Unlock the context. // Unlock the context.
this->UnlockContext(); this->UnlockContext(c);
// Jump back to the instruction after the emulated SVC. // Jump back to the instruction after the emulated SVC.
this->BranchToModule(module_dest); if (&c == &c_pre)
this->BranchToModulePre(module_dest);
else
this->BranchToModule(module_dest);
// Store PC after call. // Store PC after call.
c.l(pc_after_svc); c.l(pc_after_svc);
this->WriteModulePc(module_dest); if (&c == &c_pre)
this->WriteModulePcPre(module_dest);
else
this->WriteModulePc(module_dest);
} }
void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg,
oaknut::SystemReg src_reg) { oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& c) {
// Retrieve emulated TLS register from GuestContext. // Retrieve emulated TLS register from GuestContext.
c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0); c.MRS(dest_reg, oaknut::SystemReg::TPIDR_EL0);
if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) { if (src_reg == oaknut::SystemReg::TPIDRRO_EL0) {
@ -415,10 +583,13 @@ void Patcher::WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg
} }
// Jump back to the instruction after the emulated MRS. // Jump back to the instruction after the emulated MRS.
this->BranchToModule(module_dest); if (&c == &c_pre)
this->BranchToModulePre(module_dest);
else
this->BranchToModule(module_dest);
} }
void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg, oaknut::VectorCodeGenerator& c) {
const auto scratch_reg = src_reg.index() == 0 ? X1 : X0; const auto scratch_reg = src_reg.index() == 0 ? X1 : X0;
c.STR(scratch_reg, SP, PRE_INDEXED, -16); c.STR(scratch_reg, SP, PRE_INDEXED, -16);
@ -430,10 +601,13 @@ void Patcher::WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg)
c.LDR(scratch_reg, SP, POST_INDEXED, 16); c.LDR(scratch_reg, SP, POST_INDEXED, 16);
// Jump back to the instruction after the emulated MSR. // Jump back to the instruction after the emulated MSR.
this->BranchToModule(module_dest); if (&c == &c_pre)
this->BranchToModulePre(module_dest);
else
this->BranchToModule(module_dest);
} }
void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::VectorCodeGenerator& c) {
static Common::Arm64::NativeClock clock{}; static Common::Arm64::NativeClock clock{};
const auto factor = clock.GetGuestCNTFRQFactor(); const auto factor = clock.GetGuestCNTFRQFactor();
const auto raw_factor = std::bit_cast<std::array<u64, 2>>(factor); const auto raw_factor = std::bit_cast<std::array<u64, 2>>(factor);
@ -465,7 +639,10 @@ void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_
c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16); c.LDP(scratch0, scratch1, SP, POST_INDEXED, 16);
// Jump back to the instruction after the emulated MRS. // Jump back to the instruction after the emulated MRS.
this->BranchToModule(module_dest); if (&c == &c_pre)
this->BranchToModulePre(module_dest);
else
this->BranchToModule(module_dest);
// Scaling factor constant values. // Scaling factor constant values.
c.l(factorlo); c.l(factorlo);
@ -474,7 +651,7 @@ void Patcher::WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_
c.dx(raw_factor[1]); c.dx(raw_factor[1]);
} }
void Patcher::LockContext() { void Patcher::LockContext(oaknut::VectorCodeGenerator& c) {
oaknut::Label retry; oaknut::Label retry;
// Save scratches. // Save scratches.
@ -504,7 +681,7 @@ void Patcher::LockContext() {
c.LDP(X0, X1, SP, POST_INDEXED, 16); c.LDP(X0, X1, SP, POST_INDEXED, 16);
} }
void Patcher::UnlockContext() { void Patcher::UnlockContext(oaknut::VectorCodeGenerator& c) {
// Save scratches. // Save scratches.
c.STP(X0, X1, SP, PRE_INDEXED, -16); c.STP(X0, X1, SP, PRE_INDEXED, -16);

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
#pragma once #pragma once
@ -41,6 +41,7 @@ enum class PatchMode : u32 {
None, None,
PreText, ///< Patch section is inserted before .text PreText, ///< Patch section is inserted before .text
PostData, ///< Patch section is inserted after .data PostData, ///< Patch section is inserted after .data
Split, ///< Patch sections are inserted before .text and after .data
}; };
using ModuleTextAddress = u64; using ModuleTextAddress = u64;
@ -63,6 +64,7 @@ public:
bool RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, bool RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines); Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines);
size_t GetSectionSize() const noexcept; size_t GetSectionSize() const noexcept;
size_t GetPreSectionSize() const noexcept;
[[nodiscard]] PatchMode GetPatchMode() const noexcept { [[nodiscard]] PatchMode GetPatchMode() const noexcept {
return mode; return mode;
@ -76,15 +78,25 @@ private:
uintptr_t module_offset; uintptr_t module_offset;
}; };
void WriteLoadContext(); // Core implementations with explicit code generator
void WriteSaveContext(); void WriteLoadContext(oaknut::VectorCodeGenerator& code);
void LockContext(); void WriteSaveContext(oaknut::VectorCodeGenerator& code);
void UnlockContext(); void LockContext(oaknut::VectorCodeGenerator& code);
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id); void UnlockContext(oaknut::VectorCodeGenerator& code);
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id, oaknut::VectorCodeGenerator& code, oaknut::Label& save_ctx, oaknut::Label& load_ctx);
oaknut::SystemReg src_reg); void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg, oaknut::VectorCodeGenerator& code);
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg); void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg, oaknut::VectorCodeGenerator& code);
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg); void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::VectorCodeGenerator& code);
// Convenience wrappers using default code generator
void WriteLoadContext() { WriteLoadContext(c); }
void WriteSaveContext() { WriteSaveContext(c); }
void LockContext() { LockContext(c); }
void UnlockContext() { UnlockContext(c); }
void WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { WriteSvcTrampoline(module_dest, svc_id, c, m_save_context, m_load_context); }
void WriteMrsHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg, oaknut::SystemReg src_reg) { WriteMrsHandler(module_dest, dest_reg, src_reg, c); }
void WriteMsrHandler(ModuleDestLabel module_dest, oaknut::XReg src_reg) { WriteMsrHandler(module_dest, src_reg, c); }
void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg) { WriteCntpctHandler(module_dest, dest_reg, c); }
private: private:
static constexpr size_t CACHE_SIZE = 16384; // Cache size for patch entries static constexpr size_t CACHE_SIZE = 16384; // Cache size for patch entries
@ -111,19 +123,34 @@ private:
} }
} }
void BranchToPatchPre(uintptr_t module_dest) {
curr_patch->m_branch_to_pre_patch_relocations.push_back({c_pre.offset(), module_dest});
}
void BranchToModule(uintptr_t module_dest) { void BranchToModule(uintptr_t module_dest) {
curr_patch->m_branch_to_module_relocations.push_back({c.offset(), module_dest}); curr_patch->m_branch_to_module_relocations.push_back({c.offset(), module_dest});
c.dw(0); c.dw(0);
} }
void BranchToModulePre(uintptr_t module_dest) {
curr_patch->m_branch_to_module_relocations_pre.push_back({c_pre.offset(), module_dest});
c_pre.dw(0);
}
void WriteModulePc(uintptr_t module_dest) { void WriteModulePc(uintptr_t module_dest) {
curr_patch->m_write_module_pc_relocations.push_back({c.offset(), module_dest}); curr_patch->m_write_module_pc_relocations.push_back({c.offset(), module_dest});
c.dx(0); c.dx(0);
} }
void WriteModulePcPre(uintptr_t module_dest) {
curr_patch->m_write_module_pc_relocations_pre.push_back({c_pre.offset(), module_dest});
c_pre.dx(0);
}
private: private:
// List of patch instructions we have generated. // List of patch instructions we have generated.
std::vector<u32> m_patch_instructions{}; std::vector<u32> m_patch_instructions{};
std::vector<u32> m_patch_instructions_pre{};
// Relocation type for relative branch from module to patch. // Relocation type for relative branch from module to patch.
struct Relocation { struct Relocation {
@ -133,15 +160,22 @@ private:
struct ModulePatch { struct ModulePatch {
std::vector<Trampoline> m_trampolines; std::vector<Trampoline> m_trampolines;
std::vector<Trampoline> m_trampolines_pre;
std::vector<Relocation> m_branch_to_patch_relocations{}; std::vector<Relocation> m_branch_to_patch_relocations{};
std::vector<Relocation> m_branch_to_pre_patch_relocations{};
std::vector<Relocation> m_branch_to_module_relocations{}; std::vector<Relocation> m_branch_to_module_relocations{};
std::vector<Relocation> m_branch_to_module_relocations_pre{};
std::vector<Relocation> m_write_module_pc_relocations{}; std::vector<Relocation> m_write_module_pc_relocations{};
std::vector<Relocation> m_write_module_pc_relocations_pre{};
std::vector<ModuleTextAddress> m_exclusives{}; std::vector<ModuleTextAddress> m_exclusives{};
}; };
oaknut::VectorCodeGenerator c; oaknut::VectorCodeGenerator c;
oaknut::VectorCodeGenerator c_pre;
oaknut::Label m_save_context{}; oaknut::Label m_save_context{};
oaknut::Label m_load_context{}; oaknut::Label m_load_context{};
oaknut::Label m_save_context_pre{};
oaknut::Label m_load_context_pre{};
PatchMode mode{PatchMode::None}; PatchMode mode{PatchMode::None};
size_t total_program_size{}; size_t total_program_size{};
size_t m_relocate_module_index{}; size_t m_relocate_module_index{};

View File

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -83,6 +86,14 @@ struct CodeSet final {
const Segment& PatchSegment() const { const Segment& PatchSegment() const {
return patch_segment; return patch_segment;
} }
Segment& PostPatchSegment() {
return post_patch_segment;
}
const Segment& PostPatchSegment() const {
return post_patch_segment;
}
#endif #endif
/// The overall data that backs this code set. /// The overall data that backs this code set.
@ -93,6 +104,7 @@ struct CodeSet final {
#ifdef HAS_NCE #ifdef HAS_NCE
Segment patch_segment; Segment patch_segment;
Segment post_patch_segment;
#endif #endif
/// The entry point address for this code set. /// The entry point address for this code set.

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
@ -1258,6 +1258,7 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
#ifdef HAS_NCE #ifdef HAS_NCE
const auto& patch = code_set.PatchSegment(); const auto& patch = code_set.PatchSegment();
const auto& post_patch = code_set.PostPatchSegment();
if (this->IsApplication() && Settings::IsNceEnabled() && patch.size != 0) { if (this->IsApplication() && Settings::IsNceEnabled() && patch.size != 0) {
auto& buffer = m_kernel.System().DeviceMemory().buffer; auto& buffer = m_kernel.System().DeviceMemory().buffer;
const auto& code = code_set.CodeSegment(); const auto& code = code_set.CodeSegment();
@ -1265,7 +1266,15 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) {
Common::MemoryPermission::Read | Common::MemoryPermission::Execute); Common::MemoryPermission::Read | Common::MemoryPermission::Execute);
buffer.Protect(GetInteger(base_addr + patch.addr), patch.size, buffer.Protect(GetInteger(base_addr + patch.addr), patch.size,
Common::MemoryPermission::Read | Common::MemoryPermission::Execute); Common::MemoryPermission::Read | Common::MemoryPermission::Execute);
// Protect post-patch segment if it exists like abve
if (post_patch.size != 0) {
buffer.Protect(GetInteger(base_addr + post_patch.addr), post_patch.size,
Common::MemoryPermission::Read | Common::MemoryPermission::Execute);
}
ReprotectSegment(code_set.PatchSegment(), Svc::MemoryPermission::None); ReprotectSegment(code_set.PatchSegment(), Svc::MemoryPermission::None);
if (post_patch.size != 0) {
ReprotectSegment(code_set.PostPatchSegment(), Svc::MemoryPermission::None);
}
} }
#endif #endif
} }

View File

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
@ -102,6 +102,8 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
auto* patch = &patches->operator[](patch_index); auto* patch = &patches->operator[](patch_index);
if (patch->GetPatchMode() == Core::NCE::PatchMode::PreText) { if (patch->GetPatchMode() == Core::NCE::PatchMode::PreText) {
return patch->GetSectionSize(); return patch->GetSectionSize();
} else if (patch->GetPatchMode() == Core::NCE::PatchMode::Split) {
return patch->GetPreSectionSize();
} }
} }
#endif #endif
@ -178,12 +180,26 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
} }
} else if (patch) { } else if (patch) {
// Relocate code patch and copy to the program_image. // Relocate code patch and copy to the program_image.
// Save size before RelocateAndCopy (which may resize)
const size_t size_before_relocate = program_image.size();
if (patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers())) { if (patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers())) {
// Update patch section. // Update patch section.
auto& patch_segment = codeset.PatchSegment(); auto& patch_segment = codeset.PatchSegment();
patch_segment.addr = auto& post_patch_segment = codeset.PostPatchSegment();
patch->GetPatchMode() == Core::NCE::PatchMode::PreText ? 0 : image_size; const auto patch_mode = patch->GetPatchMode();
patch_segment.size = static_cast<u32>(patch->GetSectionSize()); if (patch_mode == Core::NCE::PatchMode::PreText) {
patch_segment.addr = 0;
patch_segment.size = static_cast<u32>(patch->GetSectionSize());
} else if (patch_mode == Core::NCE::PatchMode::Split) {
// For Split-mode, we are using pre-patch buffer at start, post-patch buffer at end
patch_segment.addr = 0;
patch_segment.size = static_cast<u32>(patch->GetPreSectionSize());
post_patch_segment.addr = size_before_relocate;
post_patch_segment.size = static_cast<u32>(patch->GetSectionSize());
} else {
patch_segment.addr = image_size;
patch_segment.size = static_cast<u32>(patch->GetSectionSize());
}
} }
// Refresh image_size to take account the patch section if it was added by RelocateAndCopy // Refresh image_size to take account the patch section if it was added by RelocateAndCopy
@ -193,6 +209,18 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
// If we aren't actually loading (i.e. just computing the process code layout), we are done // If we aren't actually loading (i.e. just computing the process code layout), we are done
if (!load_into_process) { if (!load_into_process) {
#ifdef HAS_NCE
// Ok, so for Split mode, we need to account for pre-patch and post-patch space
// which will be added during RelocateAndCopy in the second pass. Where it crashed
// in Android Studio at PreText. May be a better way. Works for now.
if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::Split) {
return load_base + patch->GetPreSectionSize() + image_size + patch->GetSectionSize();
} else if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::PreText) {
return load_base + patch->GetSectionSize() + image_size;
} else if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::PostData) {
return load_base + image_size + patch->GetSectionSize();
}
#endif
return load_base + image_size; return load_base + image_size;
} }