[dynarmic] AVX512CD impl for lzcnt16

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2026-02-08 08:17:28 +00:00
parent e544cb3cf6
commit 3f1575fd68
1 changed files with 15 additions and 1 deletions

View File

@ -927,7 +927,21 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) {
}
void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) {
if (code.HasHostFeature(HostFeature::AVX)) {
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512CD)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto const tmp0 = ctx.reg_alloc.UseScratchXmm(code, args[0]);
auto const tmp1 = ctx.reg_alloc.ScratchXmm(code);
// 11-latency: unpack 16-bit(128-bit) XMM into 32-bit(256-bit) YMM vector
// then just lzcnt that, pack it back (unsigned) to 16-bit
// then subtract 16-bit cuz of the zext32
/*4*/ code.vpmovzxwd(tmp0.cvt256(), tmp0);
/*4*/ code.vplzcntd(tmp0.cvt256(), tmp0.cvt256());
/*1*/ code.vpxor(tmp1, tmp1, tmp1);
/*1*/ code.vpackusdw(tmp0, tmp0, tmp1);
/*1*/ code.vpaddw(tmp0, tmp0, code.BConst<16>(xword, 65520));
/*4*/ code.vzeroupper();
ctx.reg_alloc.DefineValue(code, inst, tmp0);
} else if (code.HasHostFeature(HostFeature::AVX)) {
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
auto const data = ctx.reg_alloc.UseScratchXmm(code, args[0]);
auto const result = ctx.reg_alloc.ScratchXmm(code);