Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Limit high SIMD reg to compatible intrinsics lsra build.
  • Loading branch information
anthonycanino committed Nov 28, 2022
commit b2d4da6281a2a9e8474827945bc4c7cd4ea00d6c
2 changes: 0 additions & 2 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -1128,8 +1128,6 @@ class emitter
case IF_RWR_RRD_RRD:
case IF_RWR_RRD_RRD_CNS:
case IF_RWR_RRD_RRD_RRD:
case IF_RWR_RRD_SRD_RRD:
case IF_RWR_RRD_ARD_RRD:
return true;
default:
return false;
Expand Down
52 changes: 31 additions & 21 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,10 @@ bool emitter::IsEvexEncodedInstruction(instruction ins) const
case INS_phminposuw:
case INS_mpsadbw:
case INS_pclmulqdq:
case INS_aesdec:
case INS_aesdeclast:
case INS_aesenc:
case INS_aesenclast:
case INS_aesdec:
case INS_aesdeclast:
case INS_aesimc:
case INS_aeskeygenassist:
case INS_vzeroupper:
Expand Down Expand Up @@ -260,18 +260,24 @@ bool emitter::IsEvexEncodedInstruction(instruction ins) const
case INS_prefetcht2:
case INS_sfence:
// Might need new INS_<INS_NAME>*suffix* instructions for these.
case INS_por: // INS_pord, INS_porq.
case INS_pxor: // INS_pxord, INS_pxorq
case INS_movdqa: // INS_movdqa32, INS_movdqa64.
case INS_movdqu: // INS_movdqu8, INS_movdqu16, INS_movdqu32, INS_movdqu64.
case INS_pand: // INS_pandd, INS_pandq.
case INS_pandn: // INS_pandnd, INS_pandnq.
case INS_vextractf128: // INS_vextractf32x4, INS_vextractf64x2.
case INS_vextracti128: // INS_vextracti32x4, INS_vextracti64x2.
case INS_vinsertf128: // INS_vinsertf32x4, INS_vinsertf64x2.
case INS_vinserti128: // INS_vinserti32x4, INS_vinserti64x2.
case INS_vbroadcastf128: // INS_vbroadcastf32x4, INS_vbroadcastf64x2.
case INS_vbroadcasti128: // INS_vbroadcasti32x4, INS_vbroadcasti64x2.

// TODO-XARCH-AVX512 these need to be encoded with the proper individual EVEX instructions (movdqu8, movdqu16 etc)
// For implementation speed, I have set it up so the standing instruction will default to the 32-bit operand type
// i.e., movdqu => movdqu32 etc
// Since we are not using k registers yet, this will have no impact on correctness but will affect things once
// k registers are used (as that is the point of the "break out operand type" of these instructions)
//case INS_movdqa: // INS_movdqa32, INS_movdqa64.
//case INS_movdqu: // INS_movdqu8, INS_movdqu16, INS_movdqu32, INS_movdqu64.
//case INS_pand: // INS_pandd, INS_pandq.
//case INS_pandn: // INS_pandnd, INS_pandnq.
//case INS_por: // INS_pord, INS_porq.
//case INS_pxor: // INS_pxord, INS_pxorq
//case INS_vextractf128: // INS_vextractf32x4, INS_vextractf64x2.
//case INS_vextracti128: // INS_vextracti32x4, INS_vextracti64x2.
//case INS_vinsertf128: // INS_vinsertf32x4, INS_vinsertf64x2.
//case INS_vinserti128: // INS_vinserti32x4, INS_vinserti64x2.
{
return false;
}
Expand Down Expand Up @@ -794,13 +800,16 @@ bool emitter::TakesEvexPrefix(const instrDesc *id) const
return false;
}

instruction ins = id->idIns();

if (HasHighSIMDReg(id))
{
assert(IsEvexEncodedInstruction(ins));
// TODO-XARCH-AVX512 remove this check once k registers have been implemented
assert(!HasKMaskRegisterDest(ins));
return true;
}

instruction ins = id->idIns();

// TODO-XArch-AVX512: Revisit 'HasKMaskRegisterDest()' check once KMask support is added.
return IsEvexEncodedInstruction(ins) && !HasKMaskRegisterDest(ins);
}
Expand Down Expand Up @@ -4098,7 +4107,7 @@ void emitter::emitIns(instruction ins, emitAttr attr)

insFormat fmt = IF_NONE;

sz += emitGetAdjustedSizeEvexAware(ins, attr, code);
sz += emitGetAdjustedSizeEvexAware(id, attr, code);
if (TakesRexWPrefix(ins, attr))
{
sz += emitGetRexPrefixSize(ins);
Expand Down Expand Up @@ -5377,7 +5386,12 @@ void emitter::emitIns_R_I(instruction ins,
break;
}

sz += emitGetAdjustedSizeEvexAware(ins, attr, insCodeMI(ins));
id = emitNewInstrSC(attr, val);
id->idIns(ins);
id->idInsFmt(fmt);
id->idReg1(reg);

sz += emitGetAdjustedSizeEvexAware(id, attr, insCodeMI(ins));

// Do we need a REX prefix for AMD64? We need one if we are using any extended register (REX.R), or if we have a
// 64-bit sized operand (REX.W). Note that IMUL in our encoding is special, with a "built-in", implicit, target
Expand All @@ -5387,10 +5401,6 @@ void emitter::emitIns_R_I(instruction ins,
sz += emitGetRexPrefixSize(ins);
}

id = emitNewInstrSC(attr, val);
id->idIns(ins);
id->idInsFmt(fmt);
id->idReg1(reg);
id->idCodeSize(sz);

#ifdef DEBUG
Expand Down Expand Up @@ -14080,7 +14090,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id)
// This is INS_mov and will not take VEX prefix
assert(!TakesVexPrefix(ins));

if (TakesRexWPrefix(ins, size) || (codeEvexMigrationCheck(code) && IsWEvexOpcodeExtension(ins)))
if (TakesRexWPrefix(ins, size) || (codeEvexMigrationCheck(code) && IsWEvexOpcodeExtension(id)))
{
code = AddRexWPrefix(id, code);
}
Expand Down
16 changes: 16 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19108,6 +19108,22 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
#endif
}

bool GenTree::isEvexCompatibleHWIntrinsic(Compiler* comp)
{
assert(gtOper == GT_HWINTRINSIC);
assert(comp != nullptr);

// TODO-XARCH-AVX512 remove the ReturnsPerElementMask check once K registers have been properly
// implemented in the register allocator
#if defined(TARGET_XARCH)
return HWIntrinsicInfo::HasEvexSemantics(AsHWIntrinsic()->GetHWIntrinsicId()) && !HWIntrinsicInfo::ReturnsPerElementMask(AsHWIntrinsic()->GetHWIntrinsicId());
#elif defined(TARGET_ARM64)
return HWIntrinsicInfo::HasEvexSemantics(AsHWIntrinsic()->GetHWIntrinsicId()) && !HWIntrinsicInfo::ReturnsPerElementMask(AsHWIntrinsic()->GetHWIntrinsicId());
#else
return false;
#endif
}

GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type,
NamedIntrinsic hwIntrinsicID,
CorInfoType simdBaseJitType,
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1517,6 +1517,7 @@ struct GenTree
bool isCommutativeHWIntrinsic() const;
bool isContainableHWIntrinsic() const;
bool isRMWHWIntrinsic(Compiler* comp);
bool isEvexCompatibleHWIntrinsic(Compiler* comp);
#else
bool isCommutativeHWIntrinsic() const
{
Expand All @@ -1532,6 +1533,11 @@ struct GenTree
{
return false;
}

bool isEvexCompatibleHWIntrinsic(Compiler* comp)
{
return false;
}
#endif // FEATURE_HW_INTRINSICS

static bool OperIsCommutative(genTreeOps gtOper)
Expand Down
20 changes: 19 additions & 1 deletion src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ enum HWIntrinsicFlag : unsigned int
// contained
HW_Flag_MaybeCommutative = 0x80000,

// The intrinsic has EVEX compatible form
HW_Flag_NoEvexSemantics = 0x100000

#elif defined(TARGET_ARM64)
// The intrinsic has an immediate operand
// - the value can be (and should be) encoded in a corresponding instruction when the operand value is constant
Expand All @@ -172,7 +175,10 @@ enum HWIntrinsicFlag : unsigned int
HW_Flag_SIMDScalar = 0x1000,

// The intrinsic supports some sort of containment analysis
HW_Flag_SupportsContainment = 0x2000
HW_Flag_SupportsContainment = 0x2000,

// The intrinsic does not have an EVEX compatible form
HW_Flag_NoEvexSemantics = 0x4000

#else
#error Unsupported platform
Expand Down Expand Up @@ -761,6 +767,18 @@ struct HWIntrinsicInfo
#endif
}

static bool HasEvexSemantics(NamedIntrinsic id)
{
HWIntrinsicFlag flags = lookupFlags(id);
#if defined(TARGET_XARCH)
return (flags & HW_Flag_NoEvexSemantics) == 0;
#elif defined(TARGET_ARM64)
return (flags & HW_Flag_NoEvexSemantics) == 0;
#else
#error Unsupported platform
#endif
}

static bool HasSpecialImport(NamedIntrinsic id)
{
HWIntrinsicFlag flags = lookupFlags(id);
Expand Down
Loading