diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 917aa9420349e7..1b865537816f8c 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -5270,7 +5270,10 @@ Compiler::fgWalkResult Compiler::optVNConstantPropCurStmt(BasicBlock* block, Sta case GT_INC_SATURATE: case GT_MULHI: - assert(false && "Unexpected GT_INC_SATURATE/GT_MULHI node encountered before lowering"); +#ifdef TARGET_ARM64 + case GT_MULWIDE: +#endif + assert(false && "Unexpected GT_INC_SATURATE/GT_MULHI/GT_MULWIDE node encountered before lowering"); break; case GT_JTRUE: diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 6a78dc35154394..dd12f303452189 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -848,6 +848,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genCodeForMul(GenTreeOp* treeNode); void genCodeForIncSaturate(GenTree* treeNode); void genCodeForMulHi(GenTreeOp* treeNode); + void genCodeForMulWide(GenTreeOp* treeNode); void genLeaInstruction(GenTreeAddrMode* lea); void genSetRegToCond(regNumber dstReg, GenTree* tree); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 2428c33fee6f78..c2b2cdbc76896a 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1817,6 +1817,26 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) genProduceReg(treeNode); } +// Generate code to get the 64-bits of a 32*32 bit multiplication result +void CodeGen::genCodeForMulWide(GenTreeOp* treeNode) +{ + assert(!treeNode->gtOverflowEx()); + assert(!varTypeIsFloating(treeNode->TypeGet())); + + genConsumeOperands(treeNode); + + // The arithmetic node must be sitting in a register (since it's not contained) + regNumber targetReg = treeNode->GetRegNum(); + assert(targetReg != REG_NA); + + assert(EA_SIZE(emitActualTypeSize(treeNode)) == EA_8BYTE); + instruction ins = (treeNode->gtFlags & GTF_UNSIGNED) ? INS_umull : INS_smull; + regNumber r = GetEmitter()->emitInsTernary(ins, EA_4BYTE, treeNode, treeNode->gtGetOp1(), treeNode->gtGetOp2()); + assert(r == targetReg); + + genProduceReg(treeNode); +} + // Generate code for ADD, SUB, MUL, DIV, UDIV, AND, OR and XOR // This method is expected to have called genConsumeOperands() before calling it. void CodeGen::genCodeForBinary(GenTreeOp* treeNode) diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index ccdd576a752066..8b64103854f7f6 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -311,6 +311,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForMulHi(treeNode->AsOp()); break; + case GT_MULWIDE: + genCodeForMulWide(treeNode->AsOp()); + break; + case GT_SWAP: genCodeForSwap(treeNode->AsOp()); break; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 2c8ad35b8cdb0f..0ba9cc4abedc27 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1435,6 +1435,9 @@ struct GenTree static bool OperIsMul(genTreeOps gtOper) { return (gtOper == GT_MUL) || (gtOper == GT_MULHI) +#ifdef TARGET_ARM64 + || (gtOper == GT_MULWIDE) +#endif #if !defined(TARGET_64BIT) || (gtOper == GT_MUL_LONG) #endif diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 248bada7752035..556cd557369d7a 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -133,6 +133,9 @@ GTNODE(RSZ , GenTreeOp ,0,GTK_BINOP) GTNODE(ROL , GenTreeOp ,0,GTK_BINOP) GTNODE(ROR , GenTreeOp ,0,GTK_BINOP) GTNODE(INC_SATURATE , GenTreeOp ,0,GTK_UNOP) // saturating increment, used in division by a constant (LowerUnsignedDivOrMod) +#ifdef TARGET_ARM64 +GTNODE(MULWIDE , GenTreeOp ,1,GTK_BINOP) // widening multiply, used in division by a constant on ARM64 (LowerUnsignedDivOrMod) +#endif GTNODE(MULHI , GenTreeOp ,1,GTK_BINOP) // returns high bits (top N bits of the 2N bit result of an NxN multiply) // GT_MULHI is used in division by a constant (fgMorphDivByConst). We turn // the div into a MULHI + some adjustments. In codegen, we only use the diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index a328206151a001..9fba1614fe4997 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -144,6 +144,9 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_MUL: case GT_MULHI: +#ifdef TARGET_ARM64 + case GT_MULWIDE: +#endif #if defined(TARGET_X86) case GT_MUL_LONG: #endif @@ -5254,7 +5257,11 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) BlockRange().InsertBefore(divMod, preShiftBy, adjustedDividend); firstNode = preShiftBy; } - else if (type != TYP_I_IMPL) + else if (type != TYP_I_IMPL +#ifdef TARGET_ARM64 + && !simpleMul +#endif + ) { adjustedDividend = comp->gtNewCastNode(TYP_I_IMPL, adjustedDividend, true, TYP_U_IMPL); BlockRange().InsertBefore(divMod, adjustedDividend); @@ -5268,7 +5275,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) adjustedDividend->SetRegNum(REG_RAX); #endif - divisor->gtType = TYP_I_IMPL; + divisor->gtType = simpleMul ? TYP_INT : TYP_I_IMPL; divisor->AsIntCon()->SetIconValue(magic); if (isDiv && !postShift && type == TYP_I_IMPL) @@ -5279,11 +5286,16 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) } else { +#ifdef TARGET_ARM64 + genTreeOps mulOp = simpleMul ? GT_MULWIDE : GT_MULHI; // 64-bit MUL is more expensive than UMULL on ARM64 +#else + genTreeOps mulOp = simpleMul ? GT_MUL : GT_MULHI; // 64-bit IMUL is less expensive than MUL eax:edx on x64 +#endif // Insert a new GT_MULHI node before the existing GT_UDIV/GT_UMOD node. // The existing node will later be transformed into a GT_RSZ/GT_SUB that // computes the final result. This way don't need to find and change the use // of the existing node. - GenTree* mulhi = comp->gtNewOperNode(simpleMul ? GT_MUL : GT_MULHI, TYP_I_IMPL, adjustedDividend, divisor); + GenTree* mulhi = comp->gtNewOperNode(mulOp, TYP_I_IMPL, adjustedDividend, divisor); mulhi->gtFlags |= GTF_UNSIGNED; BlockRange().InsertBefore(divMod, mulhi); if (!firstNode) @@ -5322,7 +5334,7 @@ bool Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) } else if (type != TYP_I_IMPL) { -#ifdef TARGET_ARMARCH +#ifdef TARGET_ARM64 divMod->SetOper(GT_CAST); divMod->gtFlags |= GTF_UNSIGNED; divMod->AsCast()->gtCastType = TYP_UINT; @@ -6394,6 +6406,9 @@ void Lowering::ContainCheckNode(GenTree* node) #endif case GT_MUL: case GT_MULHI: +#ifdef TARGET_ARM64 + case GT_MULWIDE: +#endif ContainCheckMul(node->AsOp()); break; case GT_DIV: diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index a7a14fdfe54f96..bdb036a316b590 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -308,6 +308,7 @@ int LinearScan::BuildNode(GenTree* tree) case GT_DIV: case GT_MULHI: + case GT_MULWIDE: case GT_UDIV: { srcCount = BuildBinaryUses(tree->AsOp()); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index d18e49f3894cac..472ea12bb29161 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1049,6 +1049,9 @@ regMaskTP LinearScan::getKillSetForNode(GenTree* tree) case GT_MUL: case GT_MULHI: +#ifdef TARGET_ARM64 + case GT_MULWIDE: +#endif #if !defined(TARGET_64BIT) case GT_MUL_LONG: #endif