Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7384,9 +7384,9 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
#ifdef FEATURE_SIMD
if (varTypeIsSIMD(type))
{
GenTreeVecCon* vecCon = gtNewVconNode(type);
vecCon->gtSimdVal = simd_t::Zero();
return vecCon;
GenTreeVecCon* allBitsSet = gtNewVconNode(type);
allBitsSet->gtSimdVal = simd_t::Zero();
return allBitsSet;
}
#endif // FEATURE_SIMD

Expand Down
68 changes: 12 additions & 56 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1902,20 +1902,8 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
{
GenTree* lArg = call->gtArgs.GetUserArgByIndex(0)->GetNode();
GenTree* rArg = call->gtArgs.GetUserArgByIndex(1)->GetNode();

ssize_t MaxUnrollSize = 16;
#ifdef FEATURE_SIMD
MaxUnrollSize = 32;
#ifdef TARGET_XARCH
if (comp->compOpportunisticallyDependsOn(InstructionSet_Vector256))
{
MaxUnrollSize = 64;
}
// TODO-XARCH-AVX512: Consider enabling this for AVX512
#endif
#endif

if (cnsSize <= MaxUnrollSize)
// TODO: Add SIMD path for [16..128] via GT_HWINTRINSIC nodes
if (cnsSize <= 16)
{
unsigned loadWidth = 1 << BitOperations::Log2((unsigned)cnsSize);
var_types loadType;
Expand All @@ -1931,25 +1919,11 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
{
loadType = TYP_INT;
}
else if ((loadWidth == 8) || (MaxUnrollSize == 16))
else if ((loadWidth == 8) || (loadWidth == 16))
{
loadWidth = 8;
loadType = TYP_LONG;
}
#ifdef FEATURE_SIMD
else if ((loadWidth == 16) || (MaxUnrollSize == 32))
{
loadWidth = 16;
loadType = TYP_SIMD16;
}
#ifdef TARGET_XARCH
else if ((loadWidth == 32) || (MaxUnrollSize == 64))
{
loadWidth = 32;
loadType = TYP_SIMD32;
}
#endif // TARGET_XARCH
#endif // FEATURE_SIMD
else
{
unreached();
Expand All @@ -1958,26 +1932,8 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)

GenTree* result = nullptr;

auto newBinaryOp = [](Compiler* comp, genTreeOps oper, var_types type, GenTree* op1,
GenTree* op2) -> GenTree* {
#ifdef FEATURE_SIMD
if (varTypeIsSIMD(op1))
{
if (GenTree::OperIsCmpCompare(oper))
{
assert(type == TYP_INT);
return comp->gtNewSimdCmpOpAllNode(oper, TYP_BOOL, op1, op2, CORINFO_TYPE_NATIVEUINT,
genTypeSize(op1), false);
}
return comp->gtNewSimdBinOpNode(oper, op1->TypeGet(), op1, op2, CORINFO_TYPE_NATIVEUINT,
genTypeSize(op1), false);
}
#endif
return comp->gtNewOperNode(oper, type, op1, op2);
};

// loadWidth == cnsSize means a single load is enough for both args
if (loadWidth == (unsigned)cnsSize)
if ((loadWidth == (unsigned)cnsSize) && (loadWidth <= 8))
{
// We're going to emit something like the following:
//
Expand All @@ -1987,7 +1943,7 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
//
GenTree* lIndir = comp->gtNewIndir(loadType, lArg);
GenTree* rIndir = comp->gtNewIndir(loadType, rArg);
result = newBinaryOp(comp, GT_EQ, TYP_INT, lIndir, rIndir);
result = comp->gtNewOperNode(GT_EQ, TYP_INT, lIndir, rIndir);

BlockRange().InsertAfter(lArg, lIndir);
BlockRange().InsertAfter(rArg, rIndir);
Expand Down Expand Up @@ -2034,17 +1990,17 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
//
GenTree* l1Indir = comp->gtNewIndir(loadType, lArgUse.Def());
GenTree* r1Indir = comp->gtNewIndir(loadType, rArgUse.Def());
GenTree* lXor = newBinaryOp(comp, GT_XOR, actualLoadType, l1Indir, r1Indir);
GenTree* lXor = comp->gtNewOperNode(GT_XOR, actualLoadType, l1Indir, r1Indir);
GenTree* l2Offs = comp->gtNewIconNode(cnsSize - loadWidth, TYP_I_IMPL);
GenTree* l2AddOffs = newBinaryOp(comp, GT_ADD, lArg->TypeGet(), lArgClone, l2Offs);
GenTree* l2AddOffs = comp->gtNewOperNode(GT_ADD, lArg->TypeGet(), lArgClone, l2Offs);
GenTree* l2Indir = comp->gtNewIndir(loadType, l2AddOffs);
GenTree* r2Offs = comp->gtCloneExpr(l2Offs); // offset is the same
GenTree* r2AddOffs = newBinaryOp(comp, GT_ADD, rArg->TypeGet(), rArgClone, r2Offs);
GenTree* r2AddOffs = comp->gtNewOperNode(GT_ADD, rArg->TypeGet(), rArgClone, r2Offs);
GenTree* r2Indir = comp->gtNewIndir(loadType, r2AddOffs);
GenTree* rXor = newBinaryOp(comp, GT_XOR, actualLoadType, l2Indir, r2Indir);
GenTree* resultOr = newBinaryOp(comp, GT_OR, actualLoadType, lXor, rXor);
GenTree* zeroCns = comp->gtNewZeroConNode(actualLoadType);
result = newBinaryOp(comp, GT_EQ, TYP_INT, resultOr, zeroCns);
GenTree* rXor = comp->gtNewOperNode(GT_XOR, actualLoadType, l2Indir, r2Indir);
GenTree* resultOr = comp->gtNewOperNode(GT_OR, actualLoadType, lXor, rXor);
GenTree* zeroCns = comp->gtNewIconNode(0, actualLoadType);
result = comp->gtNewOperNode(GT_EQ, TYP_INT, resultOr, zeroCns);

BlockRange().InsertAfter(rArgClone, l1Indir, r1Indir, l2Offs, l2AddOffs);
BlockRange().InsertAfter(l2AddOffs, l2Indir, r2Offs, r2AddOffs, r2Indir);
Expand Down