Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add lowering for get_One, and get_AllBitSet.
  • Loading branch information
DeepakRajendrakumaran committed Mar 14, 2023
commit 891770f5ca6aa46dcccec1dbeaa421f94cd21383
31 changes: 26 additions & 5 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,14 +491,35 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre

if (vecCon->IsAllBitsSet())
{
if ((attr != EA_32BYTE) || compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
switch (attr)
{
case EA_8BYTE:
case EA_16BYTE:
{
emit->emitIns_R_R(INS_pcmpeqd, attr, targetReg, targetReg);
break;
}
#if defined(FEATURE_SIMD)
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
#else
emit->emitIns_R_R(INS_pcmpeqd, attr, targetReg, targetReg);
case EA_32BYTE:
{
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2))
emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg);
break;
}

case EA_64BYTE:
{
assert(compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F));
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
static_cast<int8_t>(0xFF));
break;
}
#endif // FEATURE_SIMD
break;

default:
{
unreached();
}
}
}

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17752,6 +17752,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_vpsllvq:
case INS_vpsrlvd:
case INS_vpsrlvq:
case INS_vpternlogd:
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += PERFSCORE_LATENCY_1C;
break;
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1519,6 +1519,9 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector256_get_AllBitsSet:
case NI_Vector256_get_One:
case NI_Vector256_get_Zero:
case NI_Vector512_get_AllBitsSet:
case NI_Vector512_get_One:
case NI_Vector512_get_Zero:
case NI_VectorT256_get_AllBitsSet:
case NI_VectorT256_get_One:
case NI_VectorT256_get_Zero:
Expand Down
30 changes: 15 additions & 15 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21911,15 +21911,7 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(
// We don't guarantee a non-temporal load will actually occur, so fallback
// to regular aligned loads if the required ISA isn't supported.

if (simdSize == 64)
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
intrinsic = NI_AVX512F_LoadAlignedVector512NonTemporal;
isNonTemporal = true;
}
}
else if (simdSize == 32)
if (simdSize == 32)
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX2))
{
Expand All @@ -21932,6 +21924,14 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(
intrinsic = NI_AVX_LoadAlignedVector256;
}
}
else if (simdSize == 64)
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
intrinsic = NI_AVX512F_LoadAlignedVector512NonTemporal;
isNonTemporal = true;
}
}
else if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
intrinsic = NI_SSE41_LoadAlignedVector128NonTemporal;
Expand Down Expand Up @@ -23173,16 +23173,16 @@ GenTree* Compiler::gtNewSimdStoreAlignedNode(

NamedIntrinsic intrinsic = NI_Illegal;

if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_StoreAligned;
}
else if (simdSize == 32)
if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_AVX_StoreAligned;
}
else if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_StoreAligned;
}
else if (simdBaseType != TYP_FLOAT)
{
intrinsic = NI_SSE2_StoreAligned;
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,9 @@ HARDWARE_INTRINSIC(Vector256, Xor,
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// Vector512 Intrinsics
HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector512, get_AllBitsSet, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector512, get_One, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector512, Load, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, LoadAligned, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, LoadAlignedNonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1403,6 +1403,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_get_AllBitsSet:
case NI_Vector256_get_AllBitsSet:
case NI_Vector512_get_AllBitsSet:
{
assert(sig->numArgs == 0);
retNode = gtNewAllBitsSetConNode(retType);
Expand All @@ -1411,6 +1412,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_get_One:
case NI_Vector256_get_One:
case NI_Vector512_get_One:
{
assert(sig->numArgs == 0);
retNode = gtNewOneConNode(retType, simdBaseType);
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,7 @@ INST3(movdqu32, "movdqu32", IUM_WR, SSEFLT(0x7F), BAD_CODE,
INST3(movdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | INS_FLAGS_None)
INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values
INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values
INST3(vpternlogd, "vpternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | INS_Flags_IsDstDstSrcAVXInstruction)
INST3(LAST_AVX512F_INSTRUCTION, "LAST_AVX512F_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)

INST3(FIRST_AVX512BW_INSTRUCTION, "FIRST_AVX512BW_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None)
Expand Down