Skip to content
Prev Previous commit
Next Next commit
Widen 32bit UDIV to 64bit MULHI when possible. Improve register alloc…
…ation.
  • Loading branch information
pentp committed May 18, 2021
commit f595fe6ff56de7247c93ce457c7fd1c76aebbb9a
49 changes: 41 additions & 8 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5170,15 +5170,27 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
#if defined(TARGET_XARCH) || defined(TARGET_ARM64)
if (!comp->opts.MinOpts() && (divisorValue >= 3))
{
size_t magic;
bool increment;
int preShift;
int postShift;
size_t magic;
bool increment;
int preShift;
int postShift;
var_types mulType = type;

if (type == TYP_INT)
{
magic =
MagicDivide::GetUnsigned32Magic(static_cast<uint32_t>(divisorValue), &increment, &preShift, &postShift);

#ifdef TARGET_64BIT
// avoid inc_saturate/multiple shifts by widening the multiplication to 32x64
if (increment || preShift && postShift)
{
mulType = TYP_LONG;
divisor->gtType = TYP_LONG;
magic = MagicDivide::GetUnsigned64Magic(static_cast<uint64_t>(divisorValue), &increment, &preShift,
&postShift, 32);
}
#endif
}
else
{
Expand Down Expand Up @@ -5219,23 +5231,35 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
BlockRange().InsertBefore(divMod, preShiftBy, adjustedDividend);
firstNode = preShiftBy;
}
else if (mulType != type)
{
adjustedDividend = comp->gtNewCastNode(mulType, adjustedDividend, true, TYP_ULONG);
BlockRange().InsertBefore(divMod, adjustedDividend);
firstNode = adjustedDividend;
}

if (isDiv && !postShift)
#ifdef TARGET_XARCH
// force input transformation to RAX because the following MULHI will kill RDX:RAX anyway and LSRA often causes
// reduntant copies otherwise
if (firstNode)
adjustedDividend->SetRegNum(REG_RAX);
#endif

divisor->AsIntCon()->SetIconValue(magic);
if (isDiv && !postShift && mulType == type)
{
divMod->SetOper(GT_MULHI);
divMod->gtOp1 = adjustedDividend;
divMod->gtFlags |= GTF_UNSIGNED;
divisor->AsIntCon()->SetIconValue(magic);
}
else
{
// Insert a new GT_MULHI node before the existing GT_UDIV/GT_UMOD node.
// The existing node will later be transformed into a GT_RSZ/GT_SUB that
// computes the final result. This way don't need to find and change the use
// of the existing node.
GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, adjustedDividend, divisor);
GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, mulType, adjustedDividend, divisor);
mulhi->gtFlags |= GTF_UNSIGNED;
divisor->AsIntCon()->SetIconValue(magic);
BlockRange().InsertBefore(divMod, mulhi);
if (!firstNode)
firstNode = mulhi;
Expand All @@ -5257,6 +5281,15 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod)
BlockRange().InsertBefore(divMod, mulhi);
}
}
else if (isDiv)
{
assert(mulType != type);
divMod->SetOper(GT_CAST);
divMod->gtOp1 = mulhi;
divMod->gtOp2 = nullptr;
divMod->gtFlags |= GTF_UNSIGNED;
divMod->AsCast()->gtCastType = TYP_UINT;
}

if (!isDiv)
{
Expand Down
5 changes: 0 additions & 5 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -318,11 +318,6 @@ int LinearScan::BuildNode(GenTree* tree)
srcCount = BuildModDiv(tree->AsOp());
break;

case GT_INC_SATURATE:
srcCount = BuildOperandUses(tree->gtGetOp1());
BuildDef(tree, RBM_EAX); // force EAX because the following MULHI will require EAX:EDX input anyway
break;

#if defined(TARGET_X86)
case GT_MUL_LONG:
dstCount = 2;
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2424,9 +2424,10 @@ uint32_t GetUnsigned32Magic(uint32_t d, bool* increment /*out*/, int* preShift /
}

#ifdef TARGET_64BIT
uint64_t GetUnsigned64Magic(uint64_t d, bool* increment /*out*/, int* preShift /*out*/, int* postShift /*out*/)
uint64_t GetUnsigned64Magic(
uint64_t d, bool* increment /*out*/, int* preShift /*out*/, int* postShift /*out*/, unsigned bits)
{
return GetUnsignedMagic<uint64_t>(d, increment, preShift, postShift, 64);
return GetUnsignedMagic<uint64_t>(d, increment, preShift, postShift, bits);
}
#endif

Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,8 @@ namespace MagicDivide
{
uint32_t GetUnsigned32Magic(uint32_t d, bool* increment /*out*/, int* preShift /*out*/, int* postShift /*out*/);
#ifdef TARGET_64BIT
uint64_t GetUnsigned64Magic(uint64_t d, bool* increment /*out*/, int* preShift /*out*/, int* postShift /*out*/);
uint64_t GetUnsigned64Magic(
uint64_t d, bool* increment /*out*/, int* preShift /*out*/, int* postShift /*out*/, unsigned bits = 64);
#endif
int32_t GetSigned32Magic(int32_t d, int* shift /*out*/);
#ifdef TARGET_64BIT
Expand Down