[dynarmic] use (v)pshufb (SSSE3+) for VREV32/64 emits (#2851)
Signed-off-by: lizzie <lizzie@eden-emu.dev> Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2851 Reviewed-by: crueter <crueter@eden-emu.dev> Reviewed-by: MaranBr <maranbr@eden-emu.dev> Reviewed-by: Caio Oliveira <caiooliveirafarias0@gmail.com> Co-authored-by: lizzie <lizzie@eden-emu.dev> Co-committed-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
9582dfffee
commit
e4b0c03a22
|
|
@ -3521,48 +3521,47 @@ void EmitX64::EmitVectorReverseElementsInHalfGroups8(EmitContext& ctx, IR::Inst*
|
|||
|
||||
void EmitX64::EmitVectorReverseElementsInWordGroups8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
// TODO: PSHUFB
|
||||
|
||||
code.movdqa(tmp, data);
|
||||
code.psllw(tmp, 8);
|
||||
code.psrlw(data, 8);
|
||||
code.por(data, tmp);
|
||||
code.pshuflw(data, data, 0b10110001);
|
||||
code.pshufhw(data, data, 0b10110001);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vpshufb(data, data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b));
|
||||
} else if (code.HasHostFeature(HostFeature::SSSE3)) {
|
||||
code.pshufb(data, code.Const(xword, 0x0405060700010203, 0x0c0d0e0f08090a0b));
|
||||
} else {
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
code.movdqa(tmp, data);
|
||||
code.psllw(tmp, 8);
|
||||
code.psrlw(data, 8);
|
||||
code.por(data, tmp);
|
||||
code.pshuflw(data, data, 0b10110001);
|
||||
code.pshufhw(data, data, 0b10110001);
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorReverseElementsInWordGroups16(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
||||
code.pshuflw(data, data, 0b10110001);
|
||||
code.pshufhw(data, data, 0b10110001);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorReverseElementsInLongGroups8(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
// TODO: PSHUFB
|
||||
|
||||
code.movdqa(tmp, data);
|
||||
code.psllw(tmp, 8);
|
||||
code.psrlw(data, 8);
|
||||
code.por(data, tmp);
|
||||
code.pshuflw(data, data, 0b00011011);
|
||||
code.pshufhw(data, data, 0b00011011);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vpshufb(data, data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f));
|
||||
} else if (code.HasHostFeature(HostFeature::SSSE3)) {
|
||||
code.pshufb(data, code.Const(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f));
|
||||
} else {
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
code.movdqa(tmp, data);
|
||||
code.psllw(tmp, 8);
|
||||
code.psrlw(data, 8);
|
||||
code.por(data, tmp);
|
||||
code.pshuflw(data, data, 0b00011011);
|
||||
code.pshufhw(data, data, 0b00011011);
|
||||
}
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -91,6 +91,28 @@ TEST_CASE("A64: CLZ", "[a64]") {
|
|||
REQUIRE(jit.GetVector(5) == Vector{0x0, 0x0000001e0000001f});
|
||||
}
|
||||
|
||||
TEST_CASE("A64: VREV", "[a64]") {
|
||||
A64TestEnv env;
|
||||
A64::UserConfig jit_user_config{};
|
||||
jit_user_config.callbacks = &env;
|
||||
A64::Jit jit{jit_user_config};
|
||||
oaknut::VectorCodeGenerator code{env.code_mem, nullptr};
|
||||
code.REV32(V0.B16(), V5.B16());
|
||||
code.REV32(V1.H8(), V5.H8());
|
||||
code.REV64(V2.B16(), V5.B16());
|
||||
code.REV64(V3.H8(), V5.H8());
|
||||
code.REV64(V4.S4(), V5.S4());
|
||||
jit.SetPC(0);
|
||||
jit.SetVector(5, {0x1020304050607080, 0x90A0B0C0D0E0F000});
|
||||
env.ticks_left = env.code_mem.size();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetVector(0) == Vector{0x4030201080706050, 0xc0b0a09000f0e0d0});
|
||||
REQUIRE(jit.GetVector(1) == Vector{0x3040102070805060, 0xb0c090a0f000d0e0});
|
||||
REQUIRE(jit.GetVector(2) == Vector{0x8070605040302010, 0x00f0e0d0c0b0a090});
|
||||
REQUIRE(jit.GetVector(3) == Vector{0x7080506030401020, 0xf000d0e0b0c090a0});
|
||||
REQUIRE(jit.GetVector(4) == Vector{0x5060708010203040, 0xd0e0f00090a0b0c0});
|
||||
}
|
||||
|
||||
TEST_CASE("A64: UADDL{V,P}", "[a64]") {
|
||||
A64TestEnv env;
|
||||
A64::UserConfig jit_user_config{};
|
||||
|
|
|
|||
Loading…
Reference in New Issue