diff --git a/docs/Debug.md b/docs/Debug.md index 3fc3bc9fee..f338bc1eeb 100644 --- a/docs/Debug.md +++ b/docs/Debug.md @@ -22,7 +22,7 @@ Debug logs can be found in General -> Debug -> Open Log Location on desktop, and Ignoring SIGSEGV when debugging in host: -- **gdb**: `handle all nostop pass`. +- **gdb**: `handle SIGSEGV nostop pass`. - **lldb**: `pro hand -p true -s false -n false SIGSEGV`. ## Debugging (guest code) diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp index 1ecb18623f..108a341a71 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_data_processing.cpp @@ -1199,50 +1199,44 @@ void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) { void EmitX64::EmitSignedDiv32(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - - ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); - const Xbyak::Reg32 dividend = ctx.reg_alloc.UseGpr(code, args[0]).cvt32(); - const Xbyak::Reg32 divisor = ctx.reg_alloc.UseScratchGpr(code, args[1]).cvt32(); - - Xbyak::Label end; - - code.xor_(eax, eax); + ctx.reg_alloc.UseScratch(code, args[0], HostLoc::RAX); + auto const divisor = ctx.reg_alloc.UseGpr(code, args[1]).cvt32(); + Xbyak::Label end, ok; code.test(divisor, divisor); - code.jz(end); - code.movsxd(rax, dividend); - code.movsxd(divisor.cvt64(), divisor); - code.cqo(); - code.idiv(divisor.cvt64()); + code.jz(end, code.T_NEAR); + code.cmp(divisor, u32(-1)); // is sign extended + code.jne(ok, code.T_NEAR); + code.cmp(eax, u32(1ULL << 31)); + code.je(end, code.T_NEAR); + code.L(ok); + code.cdq(); + code.idiv(divisor); code.L(end); - ctx.reg_alloc.DefineValue(code, inst, eax); } void EmitX64::EmitSignedDiv64(EmitContext& ctx, IR::Inst* inst) { auto args = ctx.reg_alloc.GetArgumentInfo(inst); - ctx.reg_alloc.ScratchGpr(code, HostLoc::RAX); ctx.reg_alloc.ScratchGpr(code, HostLoc::RDX); - const Xbyak::Reg64 dividend = ctx.reg_alloc.UseGpr(code, args[0]); - const Xbyak::Reg64 divisor = ctx.reg_alloc.UseGpr(code, args[1]); - + auto const dividend = ctx.reg_alloc.UseGpr(code, args[0]); + auto const divisor = ctx.reg_alloc.UseGpr(code, args[1]); Xbyak::Label end, ok; - code.xor_(eax, eax); code.test(divisor, divisor); - code.jz(end); - code.cmp(divisor, 0xffffffff); // is sign extended - code.jne(ok); - code.mov(rax, 0x8000000000000000); + code.jz(end, code.T_NEAR); // rax = 0, if divisor == 0 + code.mov(rdx, u64(-1)); + code.cmp(divisor, rdx); // is sign extended + code.jne(ok, code.T_NEAR); + code.mov(rax, u64(1ULL << 63)); code.cmp(dividend, rax); - code.je(end); + code.je(end, code.T_NEAR); // rax = 0x8000_0000 if dividend is same code.L(ok); code.mov(rax, dividend); code.cqo(); code.idiv(divisor); code.L(end); - ctx.reg_alloc.DefineValue(code, inst, rax); } diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 702d429638..f22e8aaa69 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -787,24 +787,30 @@ static void FoldCountLeadingZeros(IR::Inst& inst, bool is_32_bit) { /// Folds division operations based on the following: /// /// 1. x / 0 -> 0 (NOTE: This is an ARM-specific behavior defined in the architecture reference manual) -/// 2. imm_x / imm_y -> result -/// 3. x / 1 -> x +/// 2a. 0x8000_0000 / 0xFFFF_FFFF -> 0x8000_0000 (NOTE: More ARM bullshit) +/// 2b. 0x8000_0000_0000_0000 / 0xFFFF_FFFF_FFFF_FFFF -> 0x8000_0000_0000_0000 +/// 3. imm_x / imm_y -> result +/// 4. x / 1 -> x /// static void FoldDivide(IR::Inst& inst, bool is_32_bit, bool is_signed) { const auto rhs = inst.GetArg(1); - - if (rhs.IsZero()) { - ReplaceUsesWith(inst, is_32_bit, 0); - return; - } - const auto lhs = inst.GetArg(0); - if (lhs.IsImmediate() && rhs.IsImmediate()) { + if (lhs.IsZero() || rhs.IsZero()) { + ReplaceUsesWith(inst, is_32_bit, u64(0)); + } else if (!is_32_bit && lhs.IsUnsignedImmediate(u64(1ULL << 63)) && rhs.IsUnsignedImmediate(u64(-1))) { + ReplaceUsesWith(inst, is_32_bit, u64(1ULL << 63)); + } else if (is_32_bit && lhs.IsUnsignedImmediate(u32(1ULL << 31)) && rhs.IsUnsignedImmediate(u32(-1))) { + ReplaceUsesWith(inst, is_32_bit, u64(1ULL << 31)); + } else if (lhs.IsImmediate() && rhs.IsImmediate()) { if (is_signed) { - const s64 result = lhs.GetImmediateAsS64() / rhs.GetImmediateAsS64(); - ReplaceUsesWith(inst, is_32_bit, static_cast(result)); + auto const dl = lhs.GetImmediateAsS64(); + auto const dr = rhs.GetImmediateAsS64(); + const s64 result = dl / dr; + ReplaceUsesWith(inst, is_32_bit, u64(result)); } else { - const u64 result = lhs.GetImmediateAsU64() / rhs.GetImmediateAsU64(); + auto const dl = lhs.GetImmediateAsU64(); + auto const dr = rhs.GetImmediateAsU64(); + const u64 result = dl / dr; ReplaceUsesWith(inst, is_32_bit, result); } } else if (rhs.IsUnsignedImmediate(1)) { diff --git a/src/dynarmic/tests/A64/a64.cpp b/src/dynarmic/tests/A64/a64.cpp index e7010f081b..d331c5e8a1 100644 --- a/src/dynarmic/tests/A64/a64.cpp +++ b/src/dynarmic/tests/A64/a64.cpp @@ -1,4 +1,4 @@ -// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later /* This file is part of the dynarmic project. @@ -13,6 +13,7 @@ #include "../native/testenv.h" #include "dynarmic/common/fp/fpsr.h" #include "dynarmic/interface/exclusive_monitor.h" +#include "dynarmic/interface/optimization_flags.h" using namespace Dynarmic; using namespace oaknut::util; @@ -1460,26 +1461,91 @@ TEST_CASE("A64: SQRDMULH QC flag when output invalidated", "[a64]") { } TEST_CASE("A64: SDIV maximally", "[a64]") { + // No indication of this overflow case is produced, and the 32-bit result written to + // R[d] must be the bottom 32 bits of the binary representation of +231. + // So the result of the division is 0x80000000. A64TestEnv env; A64::UserConfig jit_user_config{}; jit_user_config.callbacks = &env; A64::Jit jit{jit_user_config}; - env.code_mem.emplace_back(0x9ac00c22); // SDIV X2, X1, X0 - env.code_mem.emplace_back(0x14000000); // B . + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.SDIV(X2, X1, X0); + code.SDIV(W5, W4, W3); jit.SetRegister(0, 0xffffffffffffffff); jit.SetRegister(1, 0x8000000000000000); jit.SetRegister(2, 0xffffffffffffffff); + jit.SetRegister(3, 0xffffffff); + jit.SetRegister(4, 0x80000000); + jit.SetRegister(5, 0xffffffff); jit.SetPC(0); - env.ticks_left = 2; + env.ticks_left = env.code_mem.size(); CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0xffffffffffffffff); REQUIRE(jit.GetRegister(1) == 0x8000000000000000); REQUIRE(jit.GetRegister(2) == 0x8000000000000000); - REQUIRE(jit.GetPC() == 4); + REQUIRE(jit.GetRegister(5) == 0x80000000); + REQUIRE(jit.GetPC() == 8); +} + +TEST_CASE("A64: SDIV maximally (Immediate)", "[a64]") { + A64TestEnv env; + A64::UserConfig jit_user_config{}; + jit_user_config.callbacks = &env; + auto const do_sdiv_code = [&] { + A64::Jit jit{jit_user_config}; + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.MOVZ(X12, 0xffff); + code.MOVZ(X11, 0x8000); + code.MOVZ(X10, 0x0000); + + // 0xffff_ffff + code.MOV(X0, X12); + code.LSL(X0, X0, 16); + code.ORR(X0, X0, X12); + code.LSL(X0, X0, 16); + code.ORR(X0, X0, X12); + code.LSL(X0, X0, 16); + code.ORR(X0, X0, X12); + + // 0x8000_0000 + code.MOV(X1, X11); + code.LSL(X1, X1, 16); + code.ORR(X1, X1, X10); + code.LSL(X1, X1, 16); + code.ORR(X1, X1, X10); + code.LSL(X1, X1, 16); + code.ORR(X1, X1, X10); + + // 0xffff_ffff + code.MOV(X3, X12); + code.LSL(X3, X3, 16); + code.ORR(X3, X3, X12); + + // 0x8000_0000 + code.MOV(X4, X11); + code.LSL(X4, X4, 16); + code.ORR(X4, X4, X10); + + code.SDIV(X2, X1, X0); + code.SDIV(W5, W4, W3); + + jit.SetPC(0); + env.ticks_left = env.code_mem.size(); + CheckedRun([&]() { jit.Run(); }); + REQUIRE(jit.GetRegister(5) == 0x80000000); + }; + SECTION("With no opts") { + jit_user_config.optimizations = no_optimizations; + do_sdiv_code(); + } + SECTION("With opts + constant folding") { + jit_user_config.optimizations = all_safe_optimizations; + do_sdiv_code(); + } } // Restricted register set required to trigger: @@ -2359,12 +2425,12 @@ TEST_CASE("A64: RBIT{16b}", "[a64]") { A64::UserConfig conf{}; conf.callbacks = &env; A64::Jit jit{conf}; - env.code_mem.emplace_back(0x6e605841); // rbit v1.16b, v2.16b - env.code_mem.emplace_back(0x6e605822); // rbit v2.16b, v1.16b - env.code_mem.emplace_back(0x14000000); // b . + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.RBIT(V1.B16(), V2.B16()); + code.RBIT(V2.B16(), V1.B16()); jit.SetVector(2, { 0xcafedead, 0xbabebeef }); jit.SetPC(0); // at _start - env.ticks_left = 4; + env.ticks_left = env.code_mem.size(); CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetVector(1)[0] == 0x537f7bb5); REQUIRE(jit.GetVector(1)[1] == 0x5d7d7df7); @@ -2377,15 +2443,15 @@ TEST_CASE("A64: CLZ{X}", "[a64]") { A64::UserConfig conf{}; conf.callbacks = &env; A64::Jit jit{conf}; - env.code_mem.emplace_back(0xdac01060); // clz x0, x3 - env.code_mem.emplace_back(0xdac01081); // clz x1, x4 - env.code_mem.emplace_back(0xdac010a2); // clz x2, x5 - env.code_mem.emplace_back(0x14000000); // b . + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.CLZ(X0, X3); + code.CLZ(X1, X4); + code.CLZ(X2, X5); jit.SetRegister(3, 0xfffffffffffffff0); jit.SetRegister(4, 0x0fffffff0ffffff0); jit.SetRegister(5, 0x07fffffeffeffef0); jit.SetPC(0); // at _start - env.ticks_left = 4; + env.ticks_left = env.code_mem.size(); CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0); REQUIRE(jit.GetRegister(1) == 4); @@ -2397,15 +2463,15 @@ TEST_CASE("A64: CLZ{W}", "[a64]") { A64::UserConfig conf{}; conf.callbacks = &env; A64::Jit jit{conf}; - env.code_mem.emplace_back(0x5ac01060); // clz w0, w3 - env.code_mem.emplace_back(0x5ac01081); // clz w1, w4 - env.code_mem.emplace_back(0x5ac010a2); // clz w2, w5 - env.code_mem.emplace_back(0x14000000); // b . + oaknut::VectorCodeGenerator code{env.code_mem, nullptr}; + code.CLZ(W0, W3); + code.CLZ(W1, W4); + code.CLZ(W2, W5); jit.SetRegister(3, 0xffff1110); jit.SetRegister(4, 0x0fff1110); jit.SetRegister(5, 0x07fffffe); jit.SetPC(0); // at _start - env.ticks_left = 4; + env.ticks_left = env.code_mem.size(); CheckedRun([&]() { jit.Run(); }); REQUIRE(jit.GetRegister(0) == 0); REQUIRE(jit.GetRegister(1) == 4);