Skip to content

Commit e5eafc9

Browse files
authored
[arm64] JIT: Enable CSE/hoisting for "arrayBase + elementOffset" (#61293)
1 parent a8cc1d0 commit e5eafc9

File tree

7 files changed

+167
-40
lines changed

7 files changed

+167
-40
lines changed

src/coreclr/jit/codegenlinear.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,6 +1574,15 @@ void CodeGen::genConsumeRegs(GenTree* tree)
15741574
{
15751575
genConsumeAddress(tree);
15761576
}
1577+
#ifdef TARGET_ARM64
1578+
else if (tree->OperIs(GT_BFIZ))
1579+
{
1580+
// Can be contained as part of LEA on ARM64
1581+
GenTreeCast* cast = tree->gtGetOp1()->AsCast();
1582+
assert(cast->isContained());
1583+
genConsumeAddress(cast->CastOp());
1584+
}
1585+
#endif
15771586
else if (tree->OperIsLocalRead())
15781587
{
15791588
// A contained lcl var must be living on stack and marked as reg optional, or not be a

src/coreclr/jit/emitarm64.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6985,6 +6985,14 @@ void emitter::emitIns_R_R_R_Ext(instruction ins,
69856985
{
69866986
shiftAmount = insOptsLSL(opt) ? scale : 0;
69876987
}
6988+
6989+
// If target reg is ZR - it means we're doing an implicit nullcheck
6990+
// where target type was ignored and set to TYP_INT.
6991+
if ((reg1 == REG_ZR) && (shiftAmount > 0))
6992+
{
6993+
shiftAmount = scale;
6994+
}
6995+
69886996
assert((shiftAmount == scale) || (shiftAmount == 0));
69896997

69906998
reg2 = encodingSPtoZR(reg2);
@@ -13481,8 +13489,23 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
1348113489
}
1348213490
else // no scale
1348313491
{
13484-
// Then load/store dataReg from/to [memBase + index]
13485-
emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
13492+
if (index->OperIs(GT_BFIZ) && index->isContained())
13493+
{
13494+
// Then load/store dataReg from/to [memBase + index*scale with sign/zero extension]
13495+
GenTreeCast* cast = index->gtGetOp1()->AsCast();
13496+
13497+
// For now, this code only supports extensions from i32/u32
13498+
assert(cast->isContained() && varTypeIsInt(cast->CastFromType()));
13499+
13500+
emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), cast->CastOp()->GetRegNum(),
13501+
cast->IsUnsigned() ? INS_OPTS_UXTW : INS_OPTS_SXTW,
13502+
(int)index->gtGetOp2()->AsIntCon()->IconValue());
13503+
}
13504+
else
13505+
{
13506+
// Then load/store dataReg from/to [memBase + index]
13507+
emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
13508+
}
1348613509
}
1348713510
}
1348813511
}

src/coreclr/jit/lower.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5033,6 +5033,28 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, var_types ta
50335033
}
50345034
}
50355035

5036+
#ifdef TARGET_ARM64
5037+
// Check if we can "contain" LEA(BFIZ) in order to extend 32bit index to 64bit as part of load/store.
5038+
if ((index != nullptr) && index->OperIs(GT_BFIZ) && index->gtGetOp1()->OperIs(GT_CAST) &&
5039+
index->gtGetOp2()->IsCnsIntOrI() && varTypeIsIntegral(targetType))
5040+
{
5041+
// BFIZ node is a binary op where op1 is GT_CAST and op2 is GT_CNS_INT
5042+
GenTreeCast* cast = index->gtGetOp1()->AsCast();
5043+
assert(cast->isContained());
5044+
5045+
const unsigned shiftBy = (unsigned)index->gtGetOp2()->AsIntCon()->IconValue();
5046+
5047+
// 'scale' and 'offset' have to be unset since we're going to use [base + index * SXTW/UXTW scale] form
5048+
// where there is no room for additional offsets/scales on ARM64. 'shiftBy' has to match target's width.
5049+
if (cast->CastOp()->TypeIs(TYP_INT) && cast->TypeIs(TYP_LONG) && (genTypeSize(targetType) == (1U << shiftBy)) &&
5050+
(scale == 1) && (offset == 0))
5051+
{
5052+
// TODO: Make sure that genCreateAddrMode marks such BFIZ candidates as GTF_DONT_CSE for better CQ.
5053+
MakeSrcContained(addrMode, index);
5054+
}
5055+
}
5056+
#endif
5057+
50365058
JITDUMP("New addressing mode node:\n ");
50375059
DISPNODE(addrMode);
50385060
JITDUMP("\n");

src/coreclr/jit/lsraarm64.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,16 @@ int LinearScan::BuildNode(GenTree* tree)
689689
if (index != nullptr)
690690
{
691691
srcCount++;
692-
BuildUse(index);
692+
if (index->OperIs(GT_BFIZ) && index->isContained())
693+
{
694+
GenTreeCast* cast = index->gtGetOp1()->AsCast();
695+
assert(cast->isContained() && (cns == 0));
696+
BuildUse(cast->CastOp());
697+
}
698+
else
699+
{
700+
BuildUse(index);
701+
}
693702
}
694703
assert(dstCount == 1);
695704

src/coreclr/jit/lsrabuild.cpp

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3021,10 +3021,22 @@ int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates)
30213021
BuildUse(addrMode->Base(), candidates);
30223022
srcCount++;
30233023
}
3024-
if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
3024+
if (addrMode->Index() != nullptr)
30253025
{
3026-
BuildUse(addrMode->Index(), candidates);
3027-
srcCount++;
3026+
if (!addrMode->Index()->isContained())
3027+
{
3028+
BuildUse(addrMode->Index(), candidates);
3029+
srcCount++;
3030+
}
3031+
#ifdef TARGET_ARM64
3032+
else if (addrMode->Index()->OperIs(GT_BFIZ))
3033+
{
3034+
GenTreeCast* cast = addrMode->Index()->gtGetOp1()->AsCast();
3035+
assert(cast->isContained());
3036+
BuildUse(cast->CastOp(), candidates);
3037+
srcCount++;
3038+
}
3039+
#endif
30283040
}
30293041
return srcCount;
30303042
}

src/coreclr/jit/morph.cpp

Lines changed: 80 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -5461,15 +5461,43 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
54615461
// the partial byref will not point within the object, and thus not get updated correctly during a GC.
54625462
// This is mostly a risk in fully-interruptible code regions.
54635463

5464-
/* Add the first element's offset */
5465-
5466-
GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL);
5464+
// We can generate two types of trees for "addr":
5465+
//
5466+
// 1) "arrRef + (index + elemOffset)"
5467+
// 2) "(arrRef + elemOffset) + index"
5468+
//
5469+
// XArch has powerful addressing modes such as [base + index*scale + offset] so it's fine with 1),
5470+
// while for Arm we better try to make an invariant sub-tree as large as possible, which is usually
5471+
// "(arrRef + elemOffset)" and is CSE/LoopHoisting friendly => produces better codegen.
5472+
// 2) should still be safe from GC's point of view since both ADD operations are byref and point to
5473+
// within the object so GC will be able to correctly track and update them.
54675474

5468-
addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns);
5475+
bool groupArrayRefWithElemOffset = false;
5476+
#ifdef TARGET_ARMARCH
5477+
groupArrayRefWithElemOffset = true;
5478+
// TODO: in some cases even on ARM we better use 1) shape because if "index" is invariant and "arrRef" is not
5479+
// we at least will be able to hoist/CSE "index + elemOffset" in some cases.
5480+
// See https://github.com/dotnet/runtime/pull/61293#issuecomment-964146497
54695481

5470-
/* Add the object ref to the element's offset */
5482+
// Use 2) form only for primitive types for now - it significantly reduced number of size regressions
5483+
if (!varTypeIsIntegral(elemTyp))
5484+
{
5485+
groupArrayRefWithElemOffset = false;
5486+
}
5487+
#endif
54715488

5472-
addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5489+
// First element's offset
5490+
GenTree* elemOffset = gtNewIconNode(elemOffs, TYP_I_IMPL);
5491+
if (groupArrayRefWithElemOffset)
5492+
{
5493+
GenTree* basePlusOffset = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, elemOffset);
5494+
addr = gtNewOperNode(GT_ADD, TYP_BYREF, basePlusOffset, addr);
5495+
}
5496+
else
5497+
{
5498+
addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, elemOffset);
5499+
addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
5500+
}
54735501

54745502
assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) != 0) ||
54755503
(GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL));
@@ -5539,16 +5567,16 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
55395567
tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
55405568
}
55415569

5542-
JITDUMP("fgMorphArrayIndex (before remorph):\n");
5543-
DISPTREE(tree);
5570+
JITDUMP("fgMorphArrayIndex (before remorph):\n")
5571+
DISPTREE(tree)
55445572

55455573
// Currently we morph the tree to perform some folding operations prior
55465574
// to attaching fieldSeq info and labeling constant array index contributions
55475575
//
55485576
tree = fgMorphTree(tree);
55495577

5550-
JITDUMP("fgMorphArrayIndex (after remorph):\n");
5551-
DISPTREE(tree);
5578+
JITDUMP("fgMorphArrayIndex (after remorph):\n")
5579+
DISPTREE(tree)
55525580

55535581
// Ideally we just want to proceed to attaching fieldSeq info and labeling the
55545582
// constant array index contributions, but the morphing operation may have changed
@@ -5562,48 +5590,66 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
55625590

55635591
if (fgIsCommaThrow(tree))
55645592
{
5565-
if ((arrElem != indTree) || // A new tree node may have been created
5566-
(indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
5593+
if ((arrElem != indTree) || // A new tree node may have been created
5594+
(!indTree->OperIs(GT_IND))) // The GT_IND may have been changed to a GT_CNS_INT
55675595
{
55685596
return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
55695597
}
55705598
}
55715599

55725600
assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
5573-
DBEXEC(fgGlobalMorph && (arrElem == tree), tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
5574-
5575-
addr = arrElem->AsOp()->gtOp1;
5601+
DBEXEC(fgGlobalMorph && (arrElem == tree), tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED)
55765602

5577-
assert(addr->TypeGet() == TYP_BYREF);
5603+
addr = arrElem->gtGetOp1();
55785604

55795605
GenTree* cnsOff = nullptr;
5580-
if (addr->OperGet() == GT_ADD)
5606+
if (addr->OperIs(GT_ADD))
55815607
{
5582-
assert(addr->TypeGet() == TYP_BYREF);
5583-
assert(addr->AsOp()->gtOp1->TypeGet() == TYP_REF);
5584-
5585-
addr = addr->AsOp()->gtOp2;
5586-
5587-
// Look for the constant [#FirstElem] node here, or as the RHS of an ADD.
5588-
5589-
if (addr->gtOper == GT_CNS_INT)
5608+
GenTree* addrOp1 = addr->gtGetOp1();
5609+
if (groupArrayRefWithElemOffset)
55905610
{
5591-
cnsOff = addr;
5592-
addr = nullptr;
5611+
if (addrOp1->OperIs(GT_ADD) && addrOp1->gtGetOp2()->IsCnsIntOrI())
5612+
{
5613+
assert(addrOp1->gtGetOp1()->TypeIs(TYP_REF));
5614+
cnsOff = addrOp1->gtGetOp2();
5615+
addr = addr->gtGetOp2();
5616+
// Label any constant array index contributions with #ConstantIndex and any LclVars with
5617+
// GTF_VAR_ARR_INDEX
5618+
addr->LabelIndex(this);
5619+
}
5620+
else
5621+
{
5622+
assert(addr->gtGetOp2()->IsCnsIntOrI());
5623+
cnsOff = addr->gtGetOp2();
5624+
addr = nullptr;
5625+
}
55935626
}
55945627
else
55955628
{
5596-
if ((addr->OperGet() == GT_ADD) && (addr->AsOp()->gtOp2->gtOper == GT_CNS_INT))
5629+
assert(addr->TypeIs(TYP_BYREF));
5630+
assert(addr->gtGetOp1()->TypeIs(TYP_REF));
5631+
addr = addr->gtGetOp2();
5632+
5633+
// Look for the constant [#FirstElem] node here, or as the RHS of an ADD.
5634+
if (addr->IsCnsIntOrI())
55975635
{
5598-
cnsOff = addr->AsOp()->gtOp2;
5599-
addr = addr->AsOp()->gtOp1;
5636+
cnsOff = addr;
5637+
addr = nullptr;
5638+
}
5639+
else
5640+
{
5641+
if ((addr->OperIs(GT_ADD)) && addr->gtGetOp2()->IsCnsIntOrI())
5642+
{
5643+
cnsOff = addr->gtGetOp2();
5644+
addr = addr->gtGetOp1();
5645+
}
5646+
// Label any constant array index contributions with #ConstantIndex and any LclVars with
5647+
// GTF_VAR_ARR_INDEX
5648+
addr->LabelIndex(this);
56005649
}
5601-
5602-
// Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
5603-
addr->LabelIndex(this);
56045650
}
56055651
}
5606-
else if (addr->OperGet() == GT_CNS_INT)
5652+
else if (addr->IsCnsIntOrI())
56075653
{
56085654
cnsOff = addr;
56095655
}

src/coreclr/jit/vartype.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,12 @@ inline bool varTypeIsLong(T vt)
247247
return (TypeGet(vt) >= TYP_LONG) && (TypeGet(vt) <= TYP_ULONG);
248248
}
249249

250+
template <class T>
251+
inline bool varTypeIsInt(T vt)
252+
{
253+
return (TypeGet(vt) >= TYP_INT) && (TypeGet(vt) <= TYP_UINT);
254+
}
255+
250256
template <class T>
251257
inline bool varTypeIsMultiReg(T vt)
252258
{

0 commit comments

Comments
 (0)