diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index e0f48954c65cd3..f29218b140949a 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1574,6 +1574,15 @@ void CodeGen::genConsumeRegs(GenTree* tree) { genConsumeAddress(tree); } +#ifdef TARGET_ARM64 + else if (tree->OperIs(GT_BFIZ)) + { + // Can be contained as part of LEA on ARM64 + GenTreeCast* cast = tree->gtGetOp1()->AsCast(); + assert(cast->isContained()); + genConsumeAddress(cast->CastOp()); + } +#endif else if (tree->OperIsLocalRead()) { // A contained lcl var must be living on stack and marked as reg optional, or not be a diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 414d1eaffa0d1f..256b348404ebff 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -6985,6 +6985,14 @@ void emitter::emitIns_R_R_R_Ext(instruction ins, { shiftAmount = insOptsLSL(opt) ? scale : 0; } + + // If target reg is ZR - it means we're doing an implicit nullcheck + // where target type was ignored and set to TYP_INT. + if ((reg1 == REG_ZR) && (shiftAmount > 0)) + { + shiftAmount = scale; + } + assert((shiftAmount == scale) || (shiftAmount == 0)); reg2 = encodingSPtoZR(reg2); @@ -13481,8 +13489,23 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR } else // no scale { - // Then load/store dataReg from/to [memBase + index] - emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum()); + if (index->OperIs(GT_BFIZ) && index->isContained()) + { + // Then load/store dataReg from/to [memBase + index*scale with sign/zero extension] + GenTreeCast* cast = index->gtGetOp1()->AsCast(); + + // For now, this code only supports extensions from i32/u32 + assert(cast->isContained() && varTypeIsInt(cast->CastFromType())); + + emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), cast->CastOp()->GetRegNum(), + cast->IsUnsigned() ? INS_OPTS_UXTW : INS_OPTS_SXTW, + (int)index->gtGetOp2()->AsIntCon()->IconValue()); + } + else + { + // Then load/store dataReg from/to [memBase + index] + emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum()); + } } } } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 5bbeaef2a9ff49..b2d92cf6be5a8a 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -5033,6 +5033,28 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, var_types ta } } +#ifdef TARGET_ARM64 + // Check if we can "contain" LEA(BFIZ) in order to extend 32bit index to 64bit as part of load/store. + if ((index != nullptr) && index->OperIs(GT_BFIZ) && index->gtGetOp1()->OperIs(GT_CAST) && + index->gtGetOp2()->IsCnsIntOrI() && varTypeIsIntegral(targetType)) + { + // BFIZ node is a binary op where op1 is GT_CAST and op2 is GT_CNS_INT + GenTreeCast* cast = index->gtGetOp1()->AsCast(); + assert(cast->isContained()); + + const unsigned shiftBy = (unsigned)index->gtGetOp2()->AsIntCon()->IconValue(); + + // 'scale' and 'offset' have to be unset since we're going to use [base + index * SXTW/UXTW scale] form + // where there is no room for additional offsets/scales on ARM64. 'shiftBy' has to match target's width. + if (cast->CastOp()->TypeIs(TYP_INT) && cast->TypeIs(TYP_LONG) && (genTypeSize(targetType) == (1U << shiftBy)) && + (scale == 1) && (offset == 0)) + { + // TODO: Make sure that genCreateAddrMode marks such BFIZ candidates as GTF_DONT_CSE for better CQ. + MakeSrcContained(addrMode, index); + } + } +#endif + JITDUMP("New addressing mode node:\n "); DISPNODE(addrMode); JITDUMP("\n"); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 0b52915af1ab47..7b5a012e1dd2cc 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -689,7 +689,16 @@ int LinearScan::BuildNode(GenTree* tree) if (index != nullptr) { srcCount++; - BuildUse(index); + if (index->OperIs(GT_BFIZ) && index->isContained()) + { + GenTreeCast* cast = index->gtGetOp1()->AsCast(); + assert(cast->isContained() && (cns == 0)); + BuildUse(cast->CastOp()); + } + else + { + BuildUse(index); + } } assert(dstCount == 1); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 35ae7eb3564cd5..88a3aebdd8e687 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3021,10 +3021,22 @@ int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates) BuildUse(addrMode->Base(), candidates); srcCount++; } - if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) + if (addrMode->Index() != nullptr) { - BuildUse(addrMode->Index(), candidates); - srcCount++; + if (!addrMode->Index()->isContained()) + { + BuildUse(addrMode->Index(), candidates); + srcCount++; + } +#ifdef TARGET_ARM64 + else if (addrMode->Index()->OperIs(GT_BFIZ)) + { + GenTreeCast* cast = addrMode->Index()->gtGetOp1()->AsCast(); + assert(cast->isContained()); + BuildUse(cast->CastOp(), candidates); + srcCount++; + } +#endif } return srcCount; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index f62ab8a5e2ffcb..dcc6286720718f 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5461,15 +5461,43 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) // the partial byref will not point within the object, and thus not get updated correctly during a GC. // This is mostly a risk in fully-interruptible code regions. - /* Add the first element's offset */ - - GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL); + // We can generate two types of trees for "addr": + // + // 1) "arrRef + (index + elemOffset)" + // 2) "(arrRef + elemOffset) + index" + // + // XArch has powerful addressing modes such as [base + index*scale + offset] so it's fine with 1), + // while for Arm we better try to make an invariant sub-tree as large as possible, which is usually + // "(arrRef + elemOffset)" and is CSE/LoopHoisting friendly => produces better codegen. + // 2) should still be safe from GC's point of view since both ADD operations are byref and point to + // within the object so GC will be able to correctly track and update them. - addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns); + bool groupArrayRefWithElemOffset = false; +#ifdef TARGET_ARMARCH + groupArrayRefWithElemOffset = true; + // TODO: in some cases even on ARM we better use 1) shape because if "index" is invariant and "arrRef" is not + // we at least will be able to hoist/CSE "index + elemOffset" in some cases. + // See https://github.com/dotnet/runtime/pull/61293#issuecomment-964146497 - /* Add the object ref to the element's offset */ + // Use 2) form only for primitive types for now - it significantly reduced number of size regressions + if (!varTypeIsIntegral(elemTyp)) + { + groupArrayRefWithElemOffset = false; + } +#endif - addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr); + // First element's offset + GenTree* elemOffset = gtNewIconNode(elemOffs, TYP_I_IMPL); + if (groupArrayRefWithElemOffset) + { + GenTree* basePlusOffset = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, elemOffset); + addr = gtNewOperNode(GT_ADD, TYP_BYREF, basePlusOffset, addr); + } + else + { + addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, elemOffset); + addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr); + } assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) != 0) || (GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL)); @@ -5539,16 +5567,16 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree); } - JITDUMP("fgMorphArrayIndex (before remorph):\n"); - DISPTREE(tree); + JITDUMP("fgMorphArrayIndex (before remorph):\n") + DISPTREE(tree) // Currently we morph the tree to perform some folding operations prior // to attaching fieldSeq info and labeling constant array index contributions // tree = fgMorphTree(tree); - JITDUMP("fgMorphArrayIndex (after remorph):\n"); - DISPTREE(tree); + JITDUMP("fgMorphArrayIndex (after remorph):\n") + DISPTREE(tree) // Ideally we just want to proceed to attaching fieldSeq info and labeling the // constant array index contributions, but the morphing operation may have changed @@ -5562,48 +5590,66 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree) if (fgIsCommaThrow(tree)) { - if ((arrElem != indTree) || // A new tree node may have been created - (indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT + if ((arrElem != indTree) || // A new tree node may have been created + (!indTree->OperIs(GT_IND))) // The GT_IND may have been changed to a GT_CNS_INT { return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc.. } } assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED)); - DBEXEC(fgGlobalMorph && (arrElem == tree), tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED); - - addr = arrElem->AsOp()->gtOp1; + DBEXEC(fgGlobalMorph && (arrElem == tree), tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED) - assert(addr->TypeGet() == TYP_BYREF); + addr = arrElem->gtGetOp1(); GenTree* cnsOff = nullptr; - if (addr->OperGet() == GT_ADD) + if (addr->OperIs(GT_ADD)) { - assert(addr->TypeGet() == TYP_BYREF); - assert(addr->AsOp()->gtOp1->TypeGet() == TYP_REF); - - addr = addr->AsOp()->gtOp2; - - // Look for the constant [#FirstElem] node here, or as the RHS of an ADD. - - if (addr->gtOper == GT_CNS_INT) + GenTree* addrOp1 = addr->gtGetOp1(); + if (groupArrayRefWithElemOffset) { - cnsOff = addr; - addr = nullptr; + if (addrOp1->OperIs(GT_ADD) && addrOp1->gtGetOp2()->IsCnsIntOrI()) + { + assert(addrOp1->gtGetOp1()->TypeIs(TYP_REF)); + cnsOff = addrOp1->gtGetOp2(); + addr = addr->gtGetOp2(); + // Label any constant array index contributions with #ConstantIndex and any LclVars with + // GTF_VAR_ARR_INDEX + addr->LabelIndex(this); + } + else + { + assert(addr->gtGetOp2()->IsCnsIntOrI()); + cnsOff = addr->gtGetOp2(); + addr = nullptr; + } } else { - if ((addr->OperGet() == GT_ADD) && (addr->AsOp()->gtOp2->gtOper == GT_CNS_INT)) + assert(addr->TypeIs(TYP_BYREF)); + assert(addr->gtGetOp1()->TypeIs(TYP_REF)); + addr = addr->gtGetOp2(); + + // Look for the constant [#FirstElem] node here, or as the RHS of an ADD. + if (addr->IsCnsIntOrI()) { - cnsOff = addr->AsOp()->gtOp2; - addr = addr->AsOp()->gtOp1; + cnsOff = addr; + addr = nullptr; + } + else + { + if ((addr->OperIs(GT_ADD)) && addr->gtGetOp2()->IsCnsIntOrI()) + { + cnsOff = addr->gtGetOp2(); + addr = addr->gtGetOp1(); + } + // Label any constant array index contributions with #ConstantIndex and any LclVars with + // GTF_VAR_ARR_INDEX + addr->LabelIndex(this); } - - // Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX - addr->LabelIndex(this); } } - else if (addr->OperGet() == GT_CNS_INT) + else if (addr->IsCnsIntOrI()) { cnsOff = addr; } diff --git a/src/coreclr/jit/vartype.h b/src/coreclr/jit/vartype.h index d43e02a0a30737..33c411032d3308 100644 --- a/src/coreclr/jit/vartype.h +++ b/src/coreclr/jit/vartype.h @@ -247,6 +247,12 @@ inline bool varTypeIsLong(T vt) return (TypeGet(vt) >= TYP_LONG) && (TypeGet(vt) <= TYP_ULONG); } +template +inline bool varTypeIsInt(T vt) +{ + return (TypeGet(vt) >= TYP_INT) && (TypeGet(vt) <= TYP_UINT); +} + template inline bool varTypeIsMultiReg(T vt) {