Skip to content
17 changes: 16 additions & 1 deletion src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1630,7 +1630,22 @@ void CodeGen::genCodeForShift(GenTree* tree)
unsigned immWidth = emitter::getBitWidth(size); // For ARM64, immWidth will be set to 32 or 64
unsigned shiftByImm = (unsigned)shiftBy->AsIntCon()->gtIconVal & (immWidth - 1);

GetEmitter()->emitIns_R_R_I(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftByImm);
#ifdef TARGET_ARM64
// Check if it's a sbfiz/ubfiz idiom (e.g. '(ulong)x << 2')
if (tree->gtGetOp1()->OperIs(GT_CAST) && tree->gtGetOp1()->isContained())
{
GenTreeCast* cast = tree->gtGetOp1()->AsCast();
assert((shiftByImm > 0) && (shiftByImm < 32));
assert(varTypeIsLong(tree) && (varTypeToSigned(cast->CastFromType()) == TYP_INT));
assert(!cast->gtOverflow());
GetEmitter()->emitIns_R_R_I_I(cast->IsUnsigned() ? INS_ubfiz : INS_sbfiz, size, tree->GetRegNum(),
cast->CastOp()->GetRegNum(), (int)shiftByImm, 32);
}
else
#endif
{
GetEmitter()->emitIns_R_R_I(ins, size, tree->GetRegNum(), operand->GetRegNum(), shiftByImm);
}
}

genProduceReg(tree);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1608,7 +1608,7 @@ void CodeGen::genConsumeRegs(GenTree* tree)
}
#endif // FEATURE_HW_INTRINSICS
#endif // TARGET_XARCH
else if (tree->OperIs(GT_BITCAST))
else if (tree->OperIs(GT_BITCAST, GT_CAST))
{
genConsumeReg(tree->gtGetOp1());
}
Expand Down
20 changes: 20 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5753,6 +5753,26 @@ void Lowering::LowerShift(GenTreeOp* shift)
shift->gtOp2->ClearContained();
}
ContainCheckShiftRotate(shift);

// Check if we can fold e.g. '(ulong/long)x << 2' where x is int/uint to ubfiz/sbfiz
#ifdef TARGET_ARM64
if (comp->opts.OptimizationEnabled() && shift->OperIs(GT_LSH) && shift->gtGetOp1()->OperIs(GT_CAST) &&
shift->gtGetOp2()->IsCnsIntOrI() && !shift->isContained() && varTypeIsIntegral(shift))
{
GenTreeIntCon* cns = shift->gtGetOp2()->AsIntCon();
GenTreeCast* cast = shift->gtGetOp1()->AsCast();

if (!cast->isContained() && !cast->IsRegOptional() && varTypeIsLong(shift) && !cast->CastOp()->isContained() &&
(varTypeToSigned(cast->CastFromType()) == TYP_INT) && (cns->IconValue() > 0) && (cns->IconValue() < 32) &&
!cast->gtOverflow() && varTypeIsIntegral(cast->CastOp()))
{
// 1-31 constant should already be contained at this point
assert(cns->isContained());
JITDUMP("Recognized ubfix/sbfix pattern in LSH(CAST, CNS), marking CAST node as contained.");
MakeSrcContained(shift, cast);
}
}
#endif
}

void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node)
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,16 @@ int LinearScan::BuildNode(GenTree* tree)
case GT_RSH:
case GT_RSZ:
case GT_ROR:
srcCount = BuildBinaryUses(tree->AsOp());
if (tree->gtGetOp1()->isContained())
{
assert(tree->OperIs(GT_LSH) && tree->gtGetOp1()->OperIs(GT_CAST) && tree->gtGetOp2()->IsCnsIntOrI());
BuildUse(tree->gtGetOp1()->gtGetOp1());
srcCount = 1;
}
else
{
srcCount = BuildBinaryUses(tree->AsOp());
}
assert(dstCount == 1);
BuildDef(tree);
break;
Expand Down