From ee2c0b689518e7045ebe8254d2c1307497d8083b Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 20 Jul 2021 16:03:34 -0700 Subject: [PATCH 01/38] Optimize FMA codegen base on the overwritten --- src/coreclr/jit/gentree.cpp | 26 +++++++ src/coreclr/jit/gentree.h | 2 + src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 6 +- src/coreclr/jit/lowerxarch.cpp | 79 ++++++++++++++------- src/coreclr/jit/lsraxarch.cpp | 8 ++- 5 files changed, 91 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 22e46b6a819eb2..0f50739ac05622 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19791,6 +19791,32 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const } } +#ifdef FEATURE_HW_INTRINSICS +//------------------------------------------------------------------------ +// +unsigned GenTreeHWIntrinsic::GetFMAOverwritten(GenTree* op1, GenTree* op2, GenTree* op3) +{ + // only FMA intrinsic node should call into this function + GenTreeLclVarCommon* overwritten = this->gtNext->AsLclVarCommon(); + assert(overwritten->gtOper == GT_STORE_LCL_VAR || overwritten->gtOper == GT_LCL_VAR); + unsigned flag = 0; // 1->op1, 2->op2, 3->op3 + if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + { + flag = 1; + } + else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + { + flag = 2; + } + else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + { + flag = 3; + } + + return flag; +} +#endif + #ifdef TARGET_ARM //------------------------------------------------------------------------ // IsOffsetMisaligned: check if the field needs a special handling on arm. diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 2c8ad35b8cdb0f..145178d788e57c 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5187,6 +5187,8 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Intrinsic instructions that have MemoryLoad or // MemoryStore semantics, false otherwise + unsigned GetFMAOverwritten(GenTree* op1, GenTree* op2, GenTree* op3); + #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() { diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 8448dbdd46c9e9..c178ac666e09de 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2133,7 +2133,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (op2->isContained() || op2->isUsedFromSpillTemp()) + unsigned flag = node->GetFMAOverwritten(op1, op2, op3); + + if (flag == 1) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2142,7 +2144,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op2Reg = op3->GetRegNum(); op3 = op2; } - else if (op1->isContained() || op1->isUsedFromSpillTemp()) + else if (flag == 3) { // 231 form: op3 = (op2 * op3) + [op1] diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 43c0df62042365..8693d6a77227d3 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6315,37 +6315,66 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { bool supportsRegOptional = false; - if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) - { - // 213 form: op1 = (op2 * op1) + [op3] - MakeSrcContained(node, op3); - } - else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) - { - // 132 form: op1 = (op1 * op3) + [op2] - MakeSrcContained(node, op2); - } - else if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) + unsigned flag = node->GetFMAOverwritten(op1, op2, op3); + + switch (flag) { - // Intrinsics with CopyUpperBits semantics cannot have op1 be contained + case 1: + { + if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) + { + // 132 form: op1 = (op1 * op3) + [op2] + MakeSrcContained(node, op2); + } + else + { + assert(supportsRegOptional); - if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + // 132 form: op1 = (op1 * op3) + [op2] + op2->SetRegOptional(); + } + break; + } + case 3: { - // 231 form: op3 = (op2 * op3) + [op1] - MakeSrcContained(node, op1); + if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) + { + // Intrinsics with CopyUpperBits semantics cannot have op1 be contained + + if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + { + // 231 form: op1 = (op2 * op3) + [op1] + MakeSrcContained(node, op1); + } + } + else + { + assert(supportsRegOptional); + // 231 form: op1 = (op2 * op3) + [op1] + op1->SetRegOptional(); + } + break; } - } - else - { - assert(supportsRegOptional); + default: + { + if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + { + // 213 form: op1 = (op2 * op1) + [op3] + MakeSrcContained(node, op3); + } + else + { + assert(supportsRegOptional); - // TODO-XArch-CQ: Technically any one of the three operands can - // be reg-optional. With a limitation on op1 where - // it can only be so if CopyUpperBits is off. - // https://github.com/dotnet/runtime/issues/6358 + // TODO-XArch-CQ: Technically any one of the three operands can + // be reg-optional. With a limitation on op1 where + // it can only be so if CopyUpperBits is off. + // https://github.com/dotnet/runtime/issues/6358 - // 213 form: op1 = (op2 * op1) + op3 - op3->SetRegOptional(); + // 213 form: op1 = (op2 * op1) + [op3] + op3->SetRegOptional(); + } + } } } else diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 2d2c45258166ab..7a26aa0257f073 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2334,10 +2334,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); + unsigned flag = intrinsicTree->GetFMAOverwritten(op1, op2, op3); + // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (op2->isContained()) + if (flag == 1) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2347,9 +2349,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += BuildOperandUses(op2); srcCount += BuildDelayFreeUses(op3, op1); } - else if (op1->isContained()) + else if (flag == 3) { - // 231 form: op3 = (op2 * op3) + [op1] + // 231 form: op1 = (op2 * op3) + [op1] tgtPrefUse = BuildUse(op3); From 46d00111aa5eb69d8de8ff244bb595fbf9d7c644 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Fri, 27 Aug 2021 10:39:42 -0700 Subject: [PATCH 02/38] Improve function/var names --- src/coreclr/jit/gentree.cpp | 24 ++++++++++++--------- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 8 +++---- src/coreclr/jit/lowerxarch.cpp | 4 ++-- src/coreclr/jit/lsraxarch.cpp | 6 +++--- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 0f50739ac05622..1cf625b29107a0 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19791,31 +19791,35 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const } } -#ifdef FEATURE_HW_INTRINSICS +#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ +// GetOverwrittenOpNumForFMA: check if the result is written into one of the operands +// +// Return Value: +// The operand number or 0 if not overwritten. // -unsigned GenTreeHWIntrinsic::GetFMAOverwritten(GenTree* op1, GenTree* op2, GenTree* op3) +unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function - GenTreeLclVarCommon* overwritten = this->gtNext->AsLclVarCommon(); - assert(overwritten->gtOper == GT_STORE_LCL_VAR || overwritten->gtOper == GT_LCL_VAR); - unsigned flag = 0; // 1->op1, 2->op2, 3->op3 + GenTreeLclVarCommon* overwritten = gtNext->AsLclVarCommon(); + assert(overwritten->OperIs(GT_STORE_LCL_VAR, GT_LCL_VAR)); + unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) { - flag = 1; + overwrittenOpNum = 1; } else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) { - flag = 2; + overwrittenOpNum = 2; } else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) { - flag = 3; + overwrittenOpNum = 3; } - return flag; + return overwrittenOpNum; } -#endif +#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS #ifdef TARGET_ARM //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 145178d788e57c..291aac7ad8c9e9 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5187,7 +5187,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Intrinsic instructions that have MemoryLoad or // MemoryStore semantics, false otherwise - unsigned GetFMAOverwritten(GenTree* op1, GenTree* op2, GenTree* op3); + unsigned GetOverwrittenOpNumForFMA(GenTree* op1, GenTree* op2, GenTree* op3); #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index c178ac666e09de..0be3ca5007f6ca 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2133,9 +2133,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - unsigned flag = node->GetFMAOverwritten(op1, op2, op3); + unsigned overwrittenOpNum = node->GetOverwrittenOpNumForFMA(op1, op2, op3); - if (flag == 1) + if (overwrittenOpNum == 1) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2144,9 +2144,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op2Reg = op3->GetRegNum(); op3 = op2; } - else if (flag == 3) + else if (overwrittenOpNum == 3) { - // 231 form: op3 = (op2 * op3) + [op1] + // 231 form: op1 = (op2 * op3) + [op1] ins = (instruction)(ins + 1); op1Reg = op3->GetRegNum(); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 8693d6a77227d3..5ab8f11fe9e8df 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6315,9 +6315,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { bool supportsRegOptional = false; - unsigned flag = node->GetFMAOverwritten(op1, op2, op3); + unsigned overwrittenOpNum = node->GetOverwrittenOpNumForFMA(op1, op2, op3); - switch (flag) + switch (overwrittenOpNum) { case 1: { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 7a26aa0257f073..dbd5e5c3a6f392 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2334,12 +2334,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); - unsigned flag = intrinsicTree->GetFMAOverwritten(op1, op2, op3); + unsigned overwrittenOpNum = intrinsicTree->GetOverwrittenOpNumForFMA(op1, op2, op3); // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (flag == 1) + if (overwrittenOpNum == 1) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2349,7 +2349,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += BuildOperandUses(op2); srcCount += BuildDelayFreeUses(op3, op1); } - else if (flag == 3) + else if (overwrittenOpNum == 3) { // 231 form: op1 = (op2 * op3) + [op1] From cce4bdac9da5b254e554cf3241c976db4cfa4fff Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Fri, 27 Aug 2021 13:31:34 -0700 Subject: [PATCH 03/38] Add assertions --- src/coreclr/jit/gentree.cpp | 1 + src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 1 + src/coreclr/jit/lsraxarch.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 1cf625b29107a0..c92b32f0a9113b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19801,6 +19801,7 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function + assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); GenTreeLclVarCommon* overwritten = gtNext->AsLclVarCommon(); assert(overwritten->OperIs(GT_STORE_LCL_VAR, GT_LCL_VAR)); unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 0be3ca5007f6ca..2c1ee92a3c78a0 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2156,6 +2156,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) else { // 213 form: op1 = (op2 * op1) + [op3] + assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); op1Reg = op1->GetRegNum(); op2Reg = op2->GetRegNum(); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index dbd5e5c3a6f392..5d1b2405af2a12 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2362,6 +2362,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) else { // 213 form: op1 = (op2 * op1) + [op3] + assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); tgtPrefUse = BuildUse(op1); srcCount += 1; From b825291c3f264d84e48afbc35775a5f725ce87dc Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 7 Sep 2021 11:45:41 -0700 Subject: [PATCH 04/38] Get use of FMA with TryGetUse --- src/coreclr/jit/gentree.cpp | 14 ++++++++------ src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 10 ++++++++-- src/coreclr/jit/lowerxarch.cpp | 8 +++++++- src/coreclr/jit/lsraxarch.cpp | 10 ++++++++-- 5 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index c92b32f0a9113b..1c4acdbba2366a 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19798,22 +19798,24 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const // Return Value: // The operand number or 0 if not overwritten. // -unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* op1, GenTree* op2, GenTree* op3) +unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); - GenTreeLclVarCommon* overwritten = gtNext->AsLclVarCommon(); - assert(overwritten->OperIs(GT_STORE_LCL_VAR, GT_LCL_VAR)); + if (!use->OperIs(GT_STORE_LCL_VAR)) + return 0; + GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); + unsigned overwrittenLclNum = overwritten->GetLclNum(); unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 - if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { overwrittenOpNum = 1; } - else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { overwrittenOpNum = 2; } - else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { overwrittenOpNum = 3; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 291aac7ad8c9e9..ba809486621b65 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5187,7 +5187,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Intrinsic instructions that have MemoryLoad or // MemoryStore semantics, false otherwise - unsigned GetOverwrittenOpNumForFMA(GenTree* op1, GenTree* op2, GenTree* op3); + unsigned GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3); #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 2c1ee92a3c78a0..e9cea5fcd09ac1 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2133,7 +2133,12 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - unsigned overwrittenOpNum = node->GetOverwrittenOpNumForFMA(op1, op2, op3); + unsigned overwrittenOpNum = 0; + LIR::Use use; + if (LIR::AsRange(compiler->compCurBB).TryGetUse(node, &use)) + { + overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + } if (overwrittenOpNum == 1) { @@ -2155,7 +2160,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { - // 213 form: op1 = (op2 * op1) + [op3] + // op2 = op1 * op2 + op3 + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); op1Reg = op1->GetRegNum(); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 5ab8f11fe9e8df..900fbc01eb5b58 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6314,8 +6314,14 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if ((intrinsicId >= NI_FMA_MultiplyAdd) && (intrinsicId <= NI_FMA_MultiplySubtractNegatedScalar)) { bool supportsRegOptional = false; + unsigned overwrittenOpNum = 0; + LIR::Use use; + + if (BlockRange().TryGetUse(node, &use)) + { + overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + } - unsigned overwrittenOpNum = node->GetOverwrittenOpNumForFMA(op1, op2, op3); switch (overwrittenOpNum) { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 5d1b2405af2a12..ea371d3941b536 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2334,7 +2334,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); - unsigned overwrittenOpNum = intrinsicTree->GetOverwrittenOpNumForFMA(op1, op2, op3); + unsigned overwrittenOpNum = 0; + LIR::Use use; + if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(intrinsicTree, &use)) + { + overwrittenOpNum = intrinsicTree->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + } // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); @@ -2361,7 +2366,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { - // 213 form: op1 = (op2 * op1) + [op3] + // op2 = op1 * op2 + op3 + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); tgtPrefUse = BuildUse(op1); From f615e39ad3b6b04d9ed28685fffe83cf2bf9615f Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 8 Sep 2021 11:13:53 -0700 Subject: [PATCH 05/38] Decide FMA form with two conditions, OverwrittenOpNum and isContained --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 75 +++++++++++++-------- src/coreclr/jit/lowerxarch.cpp | 53 ++++++++++----- src/coreclr/jit/lsraxarch.cpp | 69 +++++++++++++------ 3 files changed, 132 insertions(+), 65 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index e9cea5fcd09ac1..a5ba05c42397ae 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2142,47 +2142,66 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) if (overwrittenOpNum == 1) { - // 132 form: op1 = (op1 * op3) + [op2] - - ins = (instruction)(ins - 1); - op1Reg = op1->GetRegNum(); - op2Reg = op3->GetRegNum(); - op3 = op2; + if (op2->isContained()) + { + // op1 = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + ins = (instruction)(ins - 1); + op1Reg = op1->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op2; + } + else + { + // op1 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + op1Reg = op1->GetRegNum(); + op2Reg = op2->GetRegNum(); + } } else if (overwrittenOpNum == 3) { - // 231 form: op1 = (op2 * op3) + [op1] - + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + // One of the following: + // op3 = ([op1] * op2) + op3 + // op3 = (op1 * [op2]) + op3 ins = (instruction)(ins + 1); op1Reg = op3->GetRegNum(); - op2Reg = op2->GetRegNum(); - op3 = op1; + if (op1->isContained()) + { + // op3 = ([op1] * op2) + op3 + op2Reg = op2->GetRegNum(); + op3 = op1; + } + else + { + // op3 = (op1 * [op2]) + op3 + op2Reg = op1->GetRegNum(); + op3 = op2; + } } else { // op2 = op1 * op2 + op3 // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); + if (op1->isContained()) + { + // op2 = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + op1Reg = op2->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op2; - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); - - isCommutative = !copiesUpperBits; - } - - if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) - { - assert(node->isRMWHWIntrinsic(compiler)); - - // We have "reg2 = (reg1 * reg2) +/- op3" where "reg1 != reg2" on a RMW intrinsic. - // - // For non-commutative intrinsics, we should have ensured that op2 was marked - // delay free in order to prevent it from getting assigned the same register - // as target. However, for commutative intrinsics, we can just swap the operands - // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. + } + else + { + // op2 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + op1Reg = op2->GetRegNum(); + op2Reg = op1->GetRegNum(); + } - op2Reg = op1Reg; - op1Reg = targetReg; } genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 900fbc01eb5b58..3c1c8925b58485 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6322,50 +6322,68 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); } - switch (overwrittenOpNum) { case 1: { if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) { - // 132 form: op1 = (op1 * op3) + [op2] + // op1 = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 MakeSrcContained(node, op2); } + else if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + { + // op1 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + MakeSrcContained(node, op3); + + } else { assert(supportsRegOptional); - - // 132 form: op1 = (op1 * op3) + [op2] - op2->SetRegOptional(); + // op1 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + op3->SetRegOptional(); } break; } case 3: { - if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + // One of the following: + // op3 = ([op1] * op2) + op3 + // op3 = (op1 * [op2]) + op3 + if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { - // Intrinsics with CopyUpperBits semantics cannot have op1 be contained + // Intrinsics with CopyUpperBits semantics cannot have op1 be contained - if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) - { - // 231 form: op1 = (op2 * op3) + [op1] - MakeSrcContained(node, op1); - } + MakeSrcContained(node, op1); + } + else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) + { + MakeSrcContained(node, op2); } else { assert(supportsRegOptional); - // 231 form: op1 = (op2 * op3) + [op1] - op1->SetRegOptional(); + op2->SetRegOptional(); } break; } default: { - if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); + if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + { + // op2 = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + MakeSrcContained(node, op1); + } + else if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) { - // 213 form: op1 = (op2 * op1) + [op3] + // op2 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] MakeSrcContained(node, op3); } else @@ -6377,7 +6395,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // it can only be so if CopyUpperBits is off. // https://github.com/dotnet/runtime/issues/6358 - // 213 form: op1 = (op2 * op1) + [op3] + // op2 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] op3->SetRegOptional(); } } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index ea371d3941b536..e24d2a383c291a 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2346,44 +2346,73 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (overwrittenOpNum == 1) { - // 132 form: op1 = (op1 * op3) + [op2] + if (op2->isContained()) + { + // op1 = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + tgtPrefUse = BuildUse(op1); - tgtPrefUse = BuildUse(op1); + srcCount += 1; + srcCount += BuildOperandUses(op2); + srcCount += BuildDelayFreeUses(op3, op1); + } + else + { + //assert(op3->isContained()); + // op1 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + tgtPrefUse = BuildUse(op1); - srcCount += 1; - srcCount += BuildOperandUses(op2); - srcCount += BuildDelayFreeUses(op3, op1); + srcCount += 1; + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + srcCount += BuildDelayFreeUses(op2, op1); + + } } else if (overwrittenOpNum == 3) { - // 231 form: op1 = (op2 * op3) + [op1] - + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + // One of the following: + // op3 = ([op1] * op2) + op3 + // op3 = (op1 * [op2]) + op3 tgtPrefUse = BuildUse(op3); - - srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2, op1); srcCount += 1; + if (op1->isContained()) + { + srcCount += BuildOperandUses(op1); + srcCount += BuildDelayFreeUses(op2, op3); + } + else + { + //assert(op2->isContained()); + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); + srcCount += BuildDelayFreeUses(op1, op3); + } + } else { - // op2 = op1 * op2 + op3 - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); - tgtPrefUse = BuildUse(op1); - srcCount += 1; - - if (copiesUpperBits) + if (op1->isContained()) { - srcCount += BuildDelayFreeUses(op2, op1); + // op2 = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + tgtPrefUse = BuildUse(op2); + srcCount += 1; + srcCount += BuildOperandUses(op1); + srcCount += BuildDelayFreeUses(op3, op2); } else { - tgtPrefUse2 = BuildUse(op2); + // op2 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + + tgtPrefUse = BuildUse(op2); srcCount += 1; + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + srcCount += BuildDelayFreeUses(op1, op2); } - - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); } buildUses = false; From b698036e22745fc32cf079b9f7aa27da0c8a023d Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Thu, 9 Sep 2021 21:37:29 -0700 Subject: [PATCH 06/38] Fix op reg error in codegen --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index a5ba05c42397ae..24a77806e5ea64 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2182,16 +2182,15 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { - // op2 = op1 * op2 + op3 - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); if (op1->isContained()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + ins = (instruction)(ins - 1); op1Reg = op2->GetRegNum(); op2Reg = op3->GetRegNum(); - op3 = op2; + op3 = op1; } else From 7d9c0d6902cd2243da5b40f4a4db49f9cf3e8d35 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 15 Sep 2021 13:30:05 -0700 Subject: [PATCH 07/38] Decide form using lastUse and isContained in no overwritten case --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 91 ++++++++++++++++++++- src/coreclr/jit/lowerxarch.cpp | 43 +++++++++- src/coreclr/jit/lsraxarch.cpp | 78 +++++++++++++++++- 3 files changed, 206 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 24a77806e5ea64..0fb1c3e14f766d 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2180,9 +2180,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op3 = op2; } } - else + else if (overwrittenOpNum == 2) { - assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); if (op1->isContained()) { // op2 = ([op1] * op2) + op3 @@ -2202,6 +2201,94 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } } + else + { + // No overwritten: + // result = (op1 * op2) + op3 + // decide form with contained op and lastUse + assert(overwrittenOpNum == 0); + if (op2->isContained() || op2->isUsedFromSpillTemp()) + { + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + { + // result = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + + ins = (instruction)(ins - 1); + op1Reg = op1->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op2; + } + else + { + // result = (op1 * [op2]) + op3 + // 231 form XMM1 = XMM2 * [XMM3] + XMM1 + + ins = (instruction)(ins + 1); + op1Reg = op3->GetRegNum(); + op2Reg = op1->GetRegNum(); + op3 = op2; + } + } + else if (op1->isContained() || op1->isUsedFromSpillTemp()) + { + if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0)) + { + // result = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + + ins = (instruction)(ins - 1); + op1Reg = op2->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op1; + + + } + else + { + // result = ([op1] * op2) + op3 + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + + ins = (instruction)(ins + 1); + op1Reg = op3->GetRegNum(); + op2Reg = op2->GetRegNum(); + op3 = op1; + } + } + else + { + // op3 isContained or regOptional + // result = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + //if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + //{ + op1Reg = op1->GetRegNum(); + op2Reg = op2->GetRegNum(); + //} + //else + //{ + // op1Reg = op2->GetRegNum(); + // op2Reg = op1->GetRegNum(); + //} + //isCommutative = !copiesUpperBits; + isCommutative = true; + } + + if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) + { + assert(node->isRMWHWIntrinsic(compiler)); + + // We have "reg2 = (reg1 * reg2) +/- op3" where "reg1 != reg2" on a RMW intrinsic. + // + // For non-commutative intrinsics, we should have ensured that op2 was marked + // delay free in order to prevent it from getting assigned the same register + // as target. However, for commutative intrinsics, we can just swap the operands + // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. + + op2Reg = op1Reg; + op1Reg = targetReg; + } + } genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3); genProduceReg(node); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 3c1c8925b58485..d5385a988a346a 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6371,9 +6371,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; } - default: + case 2: { - assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { // op2 = ([op1] * op2) + op3 @@ -6399,6 +6398,46 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] op3->SetRegOptional(); } + break; + } + default: + { + // No overwritten: + // result = (op1 * op2) + op3 + // decide form with contained op and lastUse + assert(overwrittenOpNum == 0); + if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + { + // result = (op1 * op2) + [op3] + MakeSrcContained(node, op3); + } + else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) + { + // result = (op1 * [op2]) + op3 + MakeSrcContained(node, op2); + } + else if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) + { + // Intrinsics with CopyUpperBits semantics cannot have op1 be contained + + if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + { + // result = ([op1] * op2) + op3 + MakeSrcContained(node, op1); + } + } + else + { + assert(supportsRegOptional); + + // TODO-XArch-CQ: Technically any one of the three operands can + // be reg-optional. With a limitation on op1 where + // it can only be so if CopyUpperBits is off. + // https://github.com/dotnet/runtime/issues/6358 + + // result = (op1 * op2) + op3 + op3->SetRegOptional(); + } } } } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index e24d2a383c291a..28f18ac72c250c 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2390,9 +2390,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } } - else + else if (overwrittenOpNum == 2) { - assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); if (op1->isContained()) { @@ -2414,6 +2413,81 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += BuildDelayFreeUses(op1, op2); } } + else + { + // No overwritten: + // result = (op1 * op2) + op3 + // decide form with contained op and lastUse + assert(overwrittenOpNum == 0); + if (op2->isContained()) + { + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + { + // result = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + + tgtPrefUse = BuildUse(op1); + srcCount += 1; + srcCount += BuildOperandUses(op2); + srcCount += BuildDelayFreeUses(op3, op1); + } + else + { + // result = (op1 * [op2]) + op3 + // 231 form XMM1 = XMM2 * [XMM3] + XMM1 + + tgtPrefUse = BuildUse(op3); + srcCount += 1; + srcCount += BuildOperandUses(op2); + srcCount += BuildDelayFreeUses(op1, op3); + } + } + else if (op1->isContained()) + { + if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0)) + { + // result = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + + tgtPrefUse = BuildUse(op2); + srcCount += 1; + srcCount += BuildOperandUses(op1); + srcCount += BuildDelayFreeUses(op3, op2); + } + else + { + // result = ([op1] * op2) + op3 + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + + tgtPrefUse = BuildUse(op3); + + srcCount += BuildOperandUses(op1); + srcCount += BuildDelayFreeUses(op2, op1); + srcCount += 1; + } + } + else + { + // op3 isContained or regOptional + // result = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + + //if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + //{ + tgtPrefUse = BuildUse(op1); + srcCount += 1; + srcCount += BuildDelayFreeUses(op2, op1); + //} + //else + //{ + // tgtPrefUse = BuildUse(op2); + // srcCount += 1; + // srcCount += BuildDelayFreeUses(op1, op2); + //} + + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + } + } buildUses = false; break; From 1344d922fba926d4dc58d68234019c488f25dabd Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Sat, 18 Sep 2021 12:42:18 -0700 Subject: [PATCH 08/38] Clean up code --- src/coreclr/jit/gentree.cpp | 39 ++++-- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 89 +------------ src/coreclr/jit/lowerxarch.cpp | 140 ++++---------------- src/coreclr/jit/lsraxarch.cpp | 114 ++-------------- 4 files changed, 70 insertions(+), 312 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 1c4acdbba2366a..d62e13d5a50f84 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19793,31 +19793,42 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ -// GetOverwrittenOpNumForFMA: check if the result is written into one of the operands +// GetOverwrittenOpNumForFMA: check if the result is written into one of the operands. +// In the case that none of the operand is overwritten, check if any of them is lastUse. // // Return Value: -// The operand number or 0 if not overwritten. +// The operand number overwritten or lastUse. 2 is the default value, where no overwritten and no lastUse. // unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); + unsigned overwrittenOpNum = 2; // 1->op1, 2->op2, 3->op3 if (!use->OperIs(GT_STORE_LCL_VAR)) - return 0; - GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); - unsigned overwrittenLclNum = overwritten->GetLclNum(); - unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 - if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) - { - overwrittenOpNum = 1; - } - else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { - overwrittenOpNum = 2; + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + overwrittenOpNum = 1; + else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) + overwrittenOpNum = 3; + else + overwrittenOpNum = 2; } - else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) + else { - overwrittenOpNum = 3; + GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); + unsigned overwrittenLclNum = overwritten->GetLclNum(); + if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) + { + overwrittenOpNum = 1; + } + else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) + { + overwrittenOpNum = 2; + } + else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) + { + overwrittenOpNum = 3; + } } return overwrittenOpNum; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 0fb1c3e14f766d..20e3cba19d2359 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2140,9 +2140,10 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); } - if (overwrittenOpNum == 1) + // Intrinsics with CopyUpperBits semantics must have op1 as target + if (overwrittenOpNum == 1 || copiesUpperBits) { - if (op2->isContained()) + if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 @@ -2158,13 +2159,11 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op1Reg = op1->GetRegNum(); op2Reg = op2->GetRegNum(); } + isCommutative = copiesUpperBits; } else if (overwrittenOpNum == 3) { // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - // One of the following: - // op3 = ([op1] * op2) + op3 - // op3 = (op1 * [op2]) + op3 ins = (instruction)(ins + 1); op1Reg = op3->GetRegNum(); if (op1->isContained()) @@ -2180,8 +2179,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op3 = op2; } } - else if (overwrittenOpNum == 2) + else { + assert(overwrittenOpNum == 2); if (op1->isContained()) { // op2 = ([op1] * op2) + op3 @@ -2199,82 +2199,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op1Reg = op2->GetRegNum(); op2Reg = op1->GetRegNum(); } - } - else - { - // No overwritten: - // result = (op1 * op2) + op3 - // decide form with contained op and lastUse - assert(overwrittenOpNum == 0); - if (op2->isContained() || op2->isUsedFromSpillTemp()) - { - if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - { - // result = (op1 * [op2]) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - - ins = (instruction)(ins - 1); - op1Reg = op1->GetRegNum(); - op2Reg = op3->GetRegNum(); - op3 = op2; - } - else - { - // result = (op1 * [op2]) + op3 - // 231 form XMM1 = XMM2 * [XMM3] + XMM1 - - ins = (instruction)(ins + 1); - op1Reg = op3->GetRegNum(); - op2Reg = op1->GetRegNum(); - op3 = op2; - } - } - else if (op1->isContained() || op1->isUsedFromSpillTemp()) - { - if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0)) - { - // result = ([op1] * op2) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - - ins = (instruction)(ins - 1); - op1Reg = op2->GetRegNum(); - op2Reg = op3->GetRegNum(); - op3 = op1; - - } - else - { - // result = ([op1] * op2) + op3 - // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - - ins = (instruction)(ins + 1); - op1Reg = op3->GetRegNum(); - op2Reg = op2->GetRegNum(); - op3 = op1; - } - } - else - { - // op3 isContained or regOptional - // result = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - //if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - //{ - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); - //} - //else - //{ - // op1Reg = op2->GetRegNum(); - // op2Reg = op1->GetRegNum(); - //} - //isCommutative = !copiesUpperBits; - isCommutative = true; - } - - if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) + if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) { assert(node->isRMWHWIntrinsic(compiler)); @@ -2288,8 +2215,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op2Reg = op1Reg; op1Reg = targetReg; } - } - genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3); genProduceReg(node); } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index d5385a988a346a..2babe03768d25f 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6314,6 +6314,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if ((intrinsicId >= NI_FMA_MultiplyAdd) && (intrinsicId <= NI_FMA_MultiplySubtractNegatedScalar)) { bool supportsRegOptional = false; + bool supportsRegOptional2 = false; unsigned overwrittenOpNum = 0; LIR::Use use; @@ -6322,123 +6323,30 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); } - switch (overwrittenOpNum) + if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { - case 1: - { - if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) - { - // op1 = (op1 * [op2]) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - MakeSrcContained(node, op2); - } - else if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) - { - // op1 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - MakeSrcContained(node, op3); - - } - else - { - assert(supportsRegOptional); - // op1 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op3->SetRegOptional(); - } - break; - } - case 3: - { - // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - // One of the following: - // op3 = ([op1] * op2) + op3 - // op3 = (op1 * [op2]) + op3 - if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) - { - // Intrinsics with CopyUpperBits semantics cannot have op1 be contained - - MakeSrcContained(node, op1); - } - else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) - { - MakeSrcContained(node, op2); - } - else - { - assert(supportsRegOptional); - op2->SetRegOptional(); - } - break; - } - case 2: - { - if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) - { - // op2 = ([op1] * op2) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - MakeSrcContained(node, op1); - } - else if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) - { - // op2 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - MakeSrcContained(node, op3); - } - else - { - assert(supportsRegOptional); - - // TODO-XArch-CQ: Technically any one of the three operands can - // be reg-optional. With a limitation on op1 where - // it can only be so if CopyUpperBits is off. - // https://github.com/dotnet/runtime/issues/6358 - - // op2 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op3->SetRegOptional(); - } - break; - } - default: - { - // No overwritten: - // result = (op1 * op2) + op3 - // decide form with contained op and lastUse - assert(overwrittenOpNum == 0); - if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) - { - // result = (op1 * op2) + [op3] - MakeSrcContained(node, op3); - } - else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) - { - // result = (op1 * [op2]) + op3 - MakeSrcContained(node, op2); - } - else if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) - { - // Intrinsics with CopyUpperBits semantics cannot have op1 be contained - - if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) - { - // result = ([op1] * op2) + op3 - MakeSrcContained(node, op1); - } - } - else - { - assert(supportsRegOptional); - - // TODO-XArch-CQ: Technically any one of the three operands can - // be reg-optional. With a limitation on op1 where - // it can only be so if CopyUpperBits is off. - // https://github.com/dotnet/runtime/issues/6358 - - // result = (op1 * op2) + op3 - op3->SetRegOptional(); - } - } + // result = ([op1] * op2) + op3 + MakeSrcContained(node, op1); + } + else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) + { + // result = (op1 * [op2]) + op3 + MakeSrcContained(node, op2); + } + else if (overwrittenOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + { + // result = (op1 * op2) + [op3] + MakeSrcContained(node, op3); + } + else if (overwrittenOpNum == 3) + { + assert(supportsRegOptional2); + op2->SetRegOptional(); + } + else + { + assert(supportsRegOptional); + op3->SetRegOptional(); } } else diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 28f18ac72c250c..af8db6f64778d0 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2344,26 +2344,21 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (overwrittenOpNum == 1) + // Intrinsics with CopyUpperBits semantics must have op1 as target + if (overwrittenOpNum == 1 || copiesUpperBits) { - if (op2->isContained()) + tgtPrefUse = BuildUse(op1); + srcCount += 1; + + if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - tgtPrefUse = BuildUse(op1); - - srcCount += 1; srcCount += BuildOperandUses(op2); srcCount += BuildDelayFreeUses(op3, op1); } else { - //assert(op3->isContained()); // op1 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - tgtPrefUse = BuildUse(op1); - - srcCount += 1; srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); srcCount += BuildDelayFreeUses(op2, op1); @@ -2371,124 +2366,43 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else if (overwrittenOpNum == 3) { - // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - // One of the following: - // op3 = ([op1] * op2) + op3 - // op3 = (op1 * [op2]) + op3 tgtPrefUse = BuildUse(op3); srcCount += 1; + if (op1->isContained()) { + // op3 = ([op1] * op2) + op3 srcCount += BuildOperandUses(op1); srcCount += BuildDelayFreeUses(op2, op3); } else { - //assert(op2->isContained()); + // op3 = (op1 * [op2]) + op3 srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); srcCount += BuildDelayFreeUses(op1, op3); } } - else if (overwrittenOpNum == 2) + else { + assert(overwrittenOpNum == 2); + tgtPrefUse = BuildUse(op2); + srcCount += 1; if (op1->isContained()) { // op2 = ([op1] * op2) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - tgtPrefUse = BuildUse(op2); - srcCount += 1; srcCount += BuildOperandUses(op1); srcCount += BuildDelayFreeUses(op3, op2); } else { // op2 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - - tgtPrefUse = BuildUse(op2); - srcCount += 1; srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); srcCount += BuildDelayFreeUses(op1, op2); } } - else - { - // No overwritten: - // result = (op1 * op2) + op3 - // decide form with contained op and lastUse - assert(overwrittenOpNum == 0); - if (op2->isContained()) - { - if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - { - // result = (op1 * [op2]) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - - tgtPrefUse = BuildUse(op1); - srcCount += 1; - srcCount += BuildOperandUses(op2); - srcCount += BuildDelayFreeUses(op3, op1); - } - else - { - // result = (op1 * [op2]) + op3 - // 231 form XMM1 = XMM2 * [XMM3] + XMM1 - - tgtPrefUse = BuildUse(op3); - srcCount += 1; - srcCount += BuildOperandUses(op2); - srcCount += BuildDelayFreeUses(op1, op3); - } - } - else if (op1->isContained()) - { - if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0)) - { - // result = ([op1] * op2) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - - tgtPrefUse = BuildUse(op2); - srcCount += 1; - srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op3, op2); - } - else - { - // result = ([op1] * op2) + op3 - // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - - tgtPrefUse = BuildUse(op3); - - srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2, op1); - srcCount += 1; - } - } - else - { - // op3 isContained or regOptional - // result = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - - //if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - //{ - tgtPrefUse = BuildUse(op1); - srcCount += 1; - srcCount += BuildDelayFreeUses(op2, op1); - //} - //else - //{ - // tgtPrefUse = BuildUse(op2); - // srcCount += 1; - // srcCount += BuildDelayFreeUses(op1, op2); - //} - - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); - } - } - + buildUses = false; break; } From 029a9b574fe32f71261c049c757fd359f96d66ba Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Mon, 20 Sep 2021 08:37:56 -0700 Subject: [PATCH 09/38] Separate default case overwrittenOpNum==0 --- src/coreclr/jit/gentree.cpp | 4 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 59 ++++++++++++++++----- src/coreclr/jit/lsraxarch.cpp | 48 +++++++++++++---- 3 files changed, 87 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d62e13d5a50f84..d49046c69057ae 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19797,13 +19797,13 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const // In the case that none of the operand is overwritten, check if any of them is lastUse. // // Return Value: -// The operand number overwritten or lastUse. 2 is the default value, where no overwritten and no lastUse. +// The operand number overwritten or lastUse. 0 is the default value, where no overwritten and no lastUse. // unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); - unsigned overwrittenOpNum = 2; // 1->op1, 2->op2, 3->op3 + unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 if (!use->OperIs(GT_STORE_LCL_VAR)) { if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 20e3cba19d2359..e9be7e61f9a35b 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2179,9 +2179,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op3 = op2; } } - else + else if (overwrittenOpNum == 2) { - assert(overwrittenOpNum == 2); if (op1->isContained()) { // op2 = ([op1] * op2) + op3 @@ -2200,21 +2199,55 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op2Reg = op1->GetRegNum(); } } + else + { + assert(overwrittenOpNum == 0); + if (op1->isContained()) + { + // op2 = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + ins = (instruction)(ins - 1); + op1Reg = op2->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op1; + } + else + { + if (op2->isContained() || op2->IsRegOptional()) + { + // op1 = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + ins = (instruction)(ins - 1); + op1Reg = op1->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op2; + } + else + { + // op1 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + op1Reg = op1->GetRegNum(); + op2Reg = op2->GetRegNum(); + } + isCommutative = copiesUpperBits; + } + + } if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) - { - assert(node->isRMWHWIntrinsic(compiler)); + { + assert(node->isRMWHWIntrinsic(compiler)); - // We have "reg2 = (reg1 * reg2) +/- op3" where "reg1 != reg2" on a RMW intrinsic. - // - // For non-commutative intrinsics, we should have ensured that op2 was marked - // delay free in order to prevent it from getting assigned the same register - // as target. However, for commutative intrinsics, we can just swap the operands - // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. + // We have "reg2 = (reg1 * reg2) +/- op3" where "reg1 != reg2" on a RMW intrinsic. + // + // For non-commutative intrinsics, we should have ensured that op2 was marked + // delay free in order to prevent it from getting assigned the same register + // as target. However, for commutative intrinsics, we can just swap the operands + // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. - op2Reg = op1Reg; - op1Reg = targetReg; - } + op2Reg = op1Reg; + op1Reg = targetReg; + } genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3); genProduceReg(node); } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index af8db6f64778d0..32f78ef7fac34c 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2353,7 +2353,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 - srcCount += BuildOperandUses(op2); + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); srcCount += BuildDelayFreeUses(op3, op1); } else @@ -2364,6 +2364,24 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } } + else if (overwrittenOpNum == 2) + { + tgtPrefUse = BuildUse(op2); + srcCount += 1; + + if (op1->isContained()) + { + // op2 = ([op1] * op2) + op3 + srcCount += BuildOperandUses(op1); + srcCount += BuildDelayFreeUses(op3, op2); + } + else + { + // op2 = (op1 * op2) + [op3] + srcCount += BuildDelayFreeUses(op1, op2); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + } + } else if (overwrittenOpNum == 3) { tgtPrefUse = BuildUse(op3); @@ -2385,22 +2403,34 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { - assert(overwrittenOpNum == 2); - tgtPrefUse = BuildUse(op2); - srcCount += 1; - + assert(overwrittenOpNum == 0); if (op1->isContained()) { - // op2 = ([op1] * op2) + op3 + tgtPrefUse = BuildUse(op2); + srcCount += 1; + // result = ([op1] * op2) + op3 srcCount += BuildOperandUses(op1); srcCount += BuildDelayFreeUses(op3, op2); } else { - // op2 = (op1 * op2) + [op3] - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); - srcCount += BuildDelayFreeUses(op1, op2); + tgtPrefUse = BuildUse(op1); + srcCount += 1; + + if (op2->isContained() || op2->IsRegOptional()) + { + // result = (op1 * [op2]) + op3 + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); + srcCount += BuildDelayFreeUses(op3, op1); + } + else + { + // result = (op1 * op2) + [op3] + srcCount += BuildDelayFreeUses(op2, op1); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + } } + } buildUses = false; From f2a371f73c9447b6ab6ca7cb99a0236a3755c0d8 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 29 Sep 2021 09:33:07 -0700 Subject: [PATCH 10/38] Apply format patch --- src/coreclr/jit/gentree.cpp | 6 ++-- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 2 -- src/coreclr/jit/lowerxarch.cpp | 31 +++++++++++---------- src/coreclr/jit/lsraxarch.cpp | 5 +--- 4 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d49046c69057ae..01157ca486437c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19806,7 +19806,7 @@ unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 if (!use->OperIs(GT_STORE_LCL_VAR)) { - if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) overwrittenOpNum = 1; else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) overwrittenOpNum = 3; @@ -19815,8 +19815,8 @@ unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op } else { - GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); - unsigned overwrittenLclNum = overwritten->GetLclNum(); + GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); + unsigned overwrittenLclNum = overwritten->GetLclNum(); if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { overwrittenOpNum = 1; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index e9be7e61f9a35b..113d8727e76b1e 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2189,7 +2189,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op1Reg = op2->GetRegNum(); op2Reg = op3->GetRegNum(); op3 = op1; - } else { @@ -2231,7 +2230,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } isCommutative = copiesUpperBits; } - } if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 2babe03768d25f..3cba270d34df1e 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6313,9 +6313,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { if ((intrinsicId >= NI_FMA_MultiplyAdd) && (intrinsicId <= NI_FMA_MultiplySubtractNegatedScalar)) { - bool supportsRegOptional = false; - bool supportsRegOptional2 = false; - unsigned overwrittenOpNum = 0; + bool supportsRegOptional = false; + bool supportsRegOptional2 = false; + unsigned overwrittenOpNum = 0; LIR::Use use; if (BlockRange().TryGetUse(node, &use)) @@ -6323,30 +6323,31 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); } - if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && + !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { - // result = ([op1] * op2) + op3 - MakeSrcContained(node, op1); + // result = ([op1] * op2) + op3 + MakeSrcContained(node, op1); } - else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) + else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) { - // result = (op1 * [op2]) + op3 - MakeSrcContained(node, op2); + // result = (op1 * [op2]) + op3 + MakeSrcContained(node, op2); } else if (overwrittenOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) { - // result = (op1 * op2) + [op3] - MakeSrcContained(node, op3); + // result = (op1 * op2) + [op3] + MakeSrcContained(node, op3); } else if (overwrittenOpNum == 3) { - assert(supportsRegOptional2); - op2->SetRegOptional(); + assert(supportsRegOptional2); + op2->SetRegOptional(); } else { - assert(supportsRegOptional); - op3->SetRegOptional(); + assert(supportsRegOptional); + op3->SetRegOptional(); } } else diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 32f78ef7fac34c..7310d28a65b612 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2361,7 +2361,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // op1 = (op1 * op2) + [op3] srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); srcCount += BuildDelayFreeUses(op2, op1); - } } else if (overwrittenOpNum == 2) @@ -2399,7 +2398,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); srcCount += BuildDelayFreeUses(op1, op3); } - } else { @@ -2430,9 +2428,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); } } - } - + buildUses = false; break; } From 9955389007ee4c06163f7bca81479e61a6fbfcdb Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Fri, 1 Oct 2021 10:23:05 -0700 Subject: [PATCH 11/38] Change variable and function names --- src/coreclr/jit/gentree.cpp | 19 +++++++-------- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 26 +++++++++++---------- src/coreclr/jit/lowerxarch.cpp | 12 +++++----- src/coreclr/jit/lsraxarch.cpp | 17 +++++++++----- 5 files changed, 41 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 01157ca486437c..f261208475a52b 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19797,21 +19797,20 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const // In the case that none of the operand is overwritten, check if any of them is lastUse. // // Return Value: -// The operand number overwritten or lastUse. 0 is the default value, where no overwritten and no lastUse. +// The operand number overwritten or lastUse. 0 is the default value, where the result is written into a destination that is not one of the source operands. // -unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) +unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); - unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 if (!use->OperIs(GT_STORE_LCL_VAR)) { if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - overwrittenOpNum = 1; + return 1; else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) - overwrittenOpNum = 3; + return 3; else - overwrittenOpNum = 2; + return 2; } else { @@ -19819,19 +19818,19 @@ unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op unsigned overwrittenLclNum = overwritten->GetLclNum(); if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { - overwrittenOpNum = 1; + return 1; } else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { - overwrittenOpNum = 2; + return 2; } else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { - overwrittenOpNum = 3; + return 3; } } - return overwrittenOpNum; + return 0; } #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ba809486621b65..e55dde968cf34a 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5187,7 +5187,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Intrinsic instructions that have MemoryLoad or // MemoryStore semantics, false otherwise - unsigned GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3); + unsigned GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3); #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 113d8727e76b1e..1f27c31abeaa5d 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2108,7 +2108,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; var_types baseType = node->GetSimdBaseType(); emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // 213 form + instruction _132form = (instruction)(ins - 1); + instruction _232form = (instruction)(ins - 1); GenTree* op1 = node->gtGetOp1(); regNumber targetReg = node->GetRegNum(); @@ -2133,21 +2135,21 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - unsigned overwrittenOpNum = 0; + unsigned resultOpNum = 0; LIR::Use use; if (LIR::AsRange(compiler->compCurBB).TryGetUse(node, &use)) { - overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + resultOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); } // Intrinsics with CopyUpperBits semantics must have op1 as target - if (overwrittenOpNum == 1 || copiesUpperBits) + if (resultOpNum == 1 || copiesUpperBits) { if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = (instruction)(ins - 1); + ins = _132form; op1Reg = op1->GetRegNum(); op2Reg = op3->GetRegNum(); op3 = op2; @@ -2161,10 +2163,10 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } isCommutative = copiesUpperBits; } - else if (overwrittenOpNum == 3) + else if (resultOpNum == 3) { // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - ins = (instruction)(ins + 1); + ins = _232form; op1Reg = op3->GetRegNum(); if (op1->isContained()) { @@ -2179,13 +2181,13 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op3 = op2; } } - else if (overwrittenOpNum == 2) + else if (resultOpNum == 2) { if (op1->isContained()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = (instruction)(ins - 1); + ins = _132form; op1Reg = op2->GetRegNum(); op2Reg = op3->GetRegNum(); op3 = op1; @@ -2200,12 +2202,12 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { - assert(overwrittenOpNum == 0); + assert(resultOpNum == 0); if (op1->isContained()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = (instruction)(ins - 1); + ins = _132form; op1Reg = op2->GetRegNum(); op2Reg = op3->GetRegNum(); op3 = op1; @@ -2216,7 +2218,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) { // op1 = (op1 * [op2]) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = (instruction)(ins - 1); + ins = _132form; op1Reg = op1->GetRegNum(); op2Reg = op3->GetRegNum(); op3 = op2; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 3cba270d34df1e..4a6ed076b97d5e 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6315,31 +6315,31 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { bool supportsRegOptional = false; bool supportsRegOptional2 = false; - unsigned overwrittenOpNum = 0; + unsigned resultOpNum = 0; LIR::Use use; if (BlockRange().TryGetUse(node, &use)) { - overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + resultOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); } - if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && + if (resultOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { // result = ([op1] * op2) + op3 MakeSrcContained(node, op1); } - else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) + else if (resultOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) { // result = (op1 * [op2]) + op3 MakeSrcContained(node, op2); } - else if (overwrittenOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + else if (resultOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) { // result = (op1 * op2) + [op3] MakeSrcContained(node, op3); } - else if (overwrittenOpNum == 3) + else if (resultOpNum == 3) { assert(supportsRegOptional2); op2->SetRegOptional(); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 7310d28a65b612..d48be418d88956 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2334,18 +2334,18 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); - unsigned overwrittenOpNum = 0; + unsigned resultOpNum = 0; LIR::Use use; if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(intrinsicTree, &use)) { - overwrittenOpNum = intrinsicTree->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + resultOpNum = intrinsicTree->GetResultOpNumForFMA(use.User(), op1, op2, op3); } // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); // Intrinsics with CopyUpperBits semantics must have op1 as target - if (overwrittenOpNum == 1 || copiesUpperBits) + if (resultOpNum == 1 || copiesUpperBits) { tgtPrefUse = BuildUse(op1); srcCount += 1; @@ -2363,7 +2363,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += BuildDelayFreeUses(op2, op1); } } - else if (overwrittenOpNum == 2) + else if (resultOpNum == 2) { tgtPrefUse = BuildUse(op2); srcCount += 1; @@ -2381,7 +2381,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); } } - else if (overwrittenOpNum == 3) + else if (resultOpNum == 3) { tgtPrefUse = BuildUse(op3); srcCount += 1; @@ -2401,9 +2401,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { - assert(overwrittenOpNum == 0); + assert(resultOpNum == 0); if (op1->isContained()) { + // In the case that result is writtent into destination that is different from any of the + // source operands, we set op2 to be tgtPrefUse when op1 is contained. + tgtPrefUse = BuildUse(op2); srcCount += 1; // result = ([op1] * op2) + op3 @@ -2412,6 +2415,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { + // When op1 is not contained, we set op1 to be tgtPrefUse. + tgtPrefUse = BuildUse(op1); srcCount += 1; From 7c566534bfcf5a709925ce8022ac452f9f513f95 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 5 Oct 2021 08:57:55 -0700 Subject: [PATCH 12/38] Update regOptional for op1 and resolve some other comments --- src/coreclr/jit/gentree.cpp | 26 +++++++++++--------- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 27 ++++++++++++--------- src/coreclr/jit/lowerxarch.cpp | 22 ++++++++++------- src/coreclr/jit/lsraxarch.cpp | 8 +++--- 4 files changed, 48 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index f261208475a52b..b39c9d91251d7c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19797,23 +19797,17 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const // In the case that none of the operand is overwritten, check if any of them is lastUse. // // Return Value: -// The operand number overwritten or lastUse. 0 is the default value, where the result is written into a destination that is not one of the source operands. +// The operand number overwritten or lastUse. 0 is the default value, where the result is written into +// a destination that is not one of the source operands. // unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); - if (!use->OperIs(GT_STORE_LCL_VAR)) - { - if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - return 1; - else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) - return 3; - else - return 2; - } - else + if (use->OperIs(GT_STORE_LCL_VAR)) { + // For store_lcl_var, check if any op is overwritten + GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); unsigned overwrittenLclNum = overwritten->GetLclNum(); if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) @@ -19829,7 +19823,17 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, Ge return 3; } } + else + { + // For LclVar, check if any op is lastUse + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + return 1; + else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) + return 3; + else + return 2; + } return 0; } #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 1f27c31abeaa5d..baace6aac64246 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2156,19 +2156,20 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { + assert(op3->isContained() || op3->IsRegOptional()); // op1 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); + op1Reg = op1->GetRegNum(); + op2Reg = op2->GetRegNum(); + isCommutative = copiesUpperBits; } - isCommutative = copiesUpperBits; } else if (resultOpNum == 3) { // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 ins = _232form; op1Reg = op3->GetRegNum(); - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // op3 = ([op1] * op2) + op3 op2Reg = op2->GetRegNum(); @@ -2176,6 +2177,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { + assert(op2->isContained() || op2->IsRegOptional()); // op3 = (op1 * [op2]) + op3 op2Reg = op1->GetRegNum(); op3 = op2; @@ -2183,7 +2185,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else if (resultOpNum == 2) { - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 @@ -2194,16 +2196,18 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { + assert(op3->isContained() || op3->IsRegOptional()); // op2 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op2->GetRegNum(); - op2Reg = op1->GetRegNum(); + op1Reg = op2->GetRegNum(); + op2Reg = op1->GetRegNum(); + isCommutative = copiesUpperBits; } } else { assert(resultOpNum == 0); - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 @@ -2225,12 +2229,13 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { + assert(op3->isContained() || op3->IsRegOptional()); // op1 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); + op1Reg = op1->GetRegNum(); + op2Reg = op2->GetRegNum(); + isCommutative = copiesUpperBits; } - isCommutative = copiesUpperBits; } } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 4a6ed076b97d5e..503010ed25c0a5 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6313,35 +6313,39 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { if ((intrinsicId >= NI_FMA_MultiplyAdd) && (intrinsicId <= NI_FMA_MultiplySubtractNegatedScalar)) { - bool supportsRegOptional = false; - bool supportsRegOptional2 = false; - unsigned resultOpNum = 0; + bool supportsRegOptional = false; + unsigned overwrittenOpNum = 0; LIR::Use use; if (BlockRange().TryGetUse(node, &use)) { - resultOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); + overwrittenOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); } - if (resultOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && + if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { // result = ([op1] * op2) + op3 MakeSrcContained(node, op1); } - else if (resultOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) + else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) { // result = (op1 * [op2]) + op3 MakeSrcContained(node, op2); } - else if (resultOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + else if (overwrittenOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) { // result = (op1 * op2) + [op3] MakeSrcContained(node, op3); } - else if (resultOpNum == 3) + else if (overwrittenOpNum != 1 && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { - assert(supportsRegOptional2); + assert(supportsRegOptional); + op1->SetRegOptional(); + } + else if (overwrittenOpNum != 2) + { + assert(supportsRegOptional); op2->SetRegOptional(); } else diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index d48be418d88956..bc91b63e419693 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2368,7 +2368,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op2); srcCount += 1; - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // op2 = ([op1] * op2) + op3 srcCount += BuildOperandUses(op1); @@ -2386,7 +2386,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op3); srcCount += 1; - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // op3 = ([op1] * op2) + op3 srcCount += BuildOperandUses(op1); @@ -2402,10 +2402,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) else { assert(resultOpNum == 0); - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // In the case that result is writtent into destination that is different from any of the - // source operands, we set op2 to be tgtPrefUse when op1 is contained. + // source operands, we set op2 to be tgtPrefUse when op1 is contained. tgtPrefUse = BuildUse(op2); srcCount += 1; From 1d51caabe802e533e8bf87a6967c01a0b6ebf516 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 20 Jul 2021 16:03:34 -0700 Subject: [PATCH 13/38] Optimize FMA codegen base on the overwritten --- src/coreclr/jit/gentree.cpp | 26 +++++++ src/coreclr/jit/gentree.h | 1 + src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 6 +- src/coreclr/jit/lowerxarch.cpp | 79 ++++++++++++++------- src/coreclr/jit/lsraxarch.cpp | 8 ++- 5 files changed, 90 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 800a09399012d5..eea18286389859 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21818,6 +21818,32 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const } } +#ifdef FEATURE_HW_INTRINSICS +//------------------------------------------------------------------------ +// +unsigned GenTreeHWIntrinsic::GetFMAOverwritten(GenTree* op1, GenTree* op2, GenTree* op3) +{ + // only FMA intrinsic node should call into this function + GenTreeLclVarCommon* overwritten = this->gtNext->AsLclVarCommon(); + assert(overwritten->gtOper == GT_STORE_LCL_VAR || overwritten->gtOper == GT_LCL_VAR); + unsigned flag = 0; // 1->op1, 2->op2, 3->op3 + if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + { + flag = 1; + } + else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + { + flag = 2; + } + else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + { + flag = 3; + } + + return flag; +} +#endif + #ifdef TARGET_ARM //------------------------------------------------------------------------ // IsOffsetMisaligned: check if the field needs a special handling on arm. diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 7fc867aef130ab..6faacee1b30362 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5278,6 +5278,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic { return (gtFlags & GTF_SIMDASHW_OP) != 0; } + unsigned GetFMAOverwritten(GenTree* op1, GenTree* op2, GenTree* op3); #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 8523b529cbec42..a49d467dbb2f3a 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2131,7 +2131,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (op2->isContained() || op2->isUsedFromSpillTemp()) + unsigned flag = node->GetFMAOverwritten(op1, op2, op3); + + if (flag == 1) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2140,7 +2142,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op2Reg = op3->GetRegNum(); op3 = op2; } - else if (op1->isContained() || op1->isUsedFromSpillTemp()) + else if (flag == 3) { // 231 form: op3 = (op2 * op3) + [op1] diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index e8b1353a24de0a..ef264f9de1fa40 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6337,37 +6337,66 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { bool supportsRegOptional = false; - if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) - { - // 213 form: op1 = (op2 * op1) + [op3] - MakeSrcContained(node, op3); - } - else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) - { - // 132 form: op1 = (op1 * op3) + [op2] - MakeSrcContained(node, op2); - } - else if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) + unsigned flag = node->GetFMAOverwritten(op1, op2, op3); + + switch (flag) { - // Intrinsics with CopyUpperBits semantics cannot have op1 be contained + case 1: + { + if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) + { + // 132 form: op1 = (op1 * op3) + [op2] + MakeSrcContained(node, op2); + } + else + { + assert(supportsRegOptional); - if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + // 132 form: op1 = (op1 * op3) + [op2] + op2->SetRegOptional(); + } + break; + } + case 3: { - // 231 form: op3 = (op2 * op3) + [op1] - MakeSrcContained(node, op1); + if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) + { + // Intrinsics with CopyUpperBits semantics cannot have op1 be contained + + if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + { + // 231 form: op1 = (op2 * op3) + [op1] + MakeSrcContained(node, op1); + } + } + else + { + assert(supportsRegOptional); + // 231 form: op1 = (op2 * op3) + [op1] + op1->SetRegOptional(); + } + break; } - } - else - { - assert(supportsRegOptional); + default: + { + if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + { + // 213 form: op1 = (op2 * op1) + [op3] + MakeSrcContained(node, op3); + } + else + { + assert(supportsRegOptional); - // TODO-XArch-CQ: Technically any one of the three operands can - // be reg-optional. With a limitation on op1 where - // it can only be so if CopyUpperBits is off. - // https://github.com/dotnet/runtime/issues/6358 + // TODO-XArch-CQ: Technically any one of the three operands can + // be reg-optional. With a limitation on op1 where + // it can only be so if CopyUpperBits is off. + // https://github.com/dotnet/runtime/issues/6358 - // 213 form: op1 = (op2 * op1) + op3 - op3->SetRegOptional(); + // 213 form: op1 = (op2 * op1) + [op3] + op3->SetRegOptional(); + } + } } } else diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index cda91bdd6a1f1b..6714d9abb9fcab 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2328,10 +2328,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); + unsigned flag = intrinsicTree->GetFMAOverwritten(op1, op2, op3); + // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (op2->isContained()) + if (flag == 1) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2341,9 +2343,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += BuildOperandUses(op2); srcCount += BuildDelayFreeUses(op3, op1); } - else if (op1->isContained()) + else if (flag == 3) { - // 231 form: op3 = (op2 * op3) + [op1] + // 231 form: op1 = (op2 * op3) + [op1] tgtPrefUse = BuildUse(op3); From 091133efc7b10a28e6138101964d744e9eb263ea Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Fri, 27 Aug 2021 10:39:42 -0700 Subject: [PATCH 14/38] Improve function/var names --- src/coreclr/jit/gentree.cpp | 24 ++++++++++++--------- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 8 +++---- src/coreclr/jit/lowerxarch.cpp | 4 ++-- src/coreclr/jit/lsraxarch.cpp | 6 +++--- 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index eea18286389859..2e71da04174e14 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21818,31 +21818,35 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const } } -#ifdef FEATURE_HW_INTRINSICS +#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ +// GetOverwrittenOpNumForFMA: check if the result is written into one of the operands +// +// Return Value: +// The operand number or 0 if not overwritten. // -unsigned GenTreeHWIntrinsic::GetFMAOverwritten(GenTree* op1, GenTree* op2, GenTree* op3) +unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function - GenTreeLclVarCommon* overwritten = this->gtNext->AsLclVarCommon(); - assert(overwritten->gtOper == GT_STORE_LCL_VAR || overwritten->gtOper == GT_LCL_VAR); - unsigned flag = 0; // 1->op1, 2->op2, 3->op3 + GenTreeLclVarCommon* overwritten = gtNext->AsLclVarCommon(); + assert(overwritten->OperIs(GT_STORE_LCL_VAR, GT_LCL_VAR)); + unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) { - flag = 1; + overwrittenOpNum = 1; } else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) { - flag = 2; + overwrittenOpNum = 2; } else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) { - flag = 3; + overwrittenOpNum = 3; } - return flag; + return overwrittenOpNum; } -#endif +#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS #ifdef TARGET_ARM //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 6faacee1b30362..cb6437be696332 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5278,7 +5278,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic { return (gtFlags & GTF_SIMDASHW_OP) != 0; } - unsigned GetFMAOverwritten(GenTree* op1, GenTree* op2, GenTree* op3); + unsigned GetOverwrittenOpNumForFMA(GenTree* op1, GenTree* op2, GenTree* op3); #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index a49d467dbb2f3a..7f0947ece99a8e 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2131,9 +2131,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - unsigned flag = node->GetFMAOverwritten(op1, op2, op3); + unsigned overwrittenOpNum = node->GetOverwrittenOpNumForFMA(op1, op2, op3); - if (flag == 1) + if (overwrittenOpNum == 1) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2142,9 +2142,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op2Reg = op3->GetRegNum(); op3 = op2; } - else if (flag == 3) + else if (overwrittenOpNum == 3) { - // 231 form: op3 = (op2 * op3) + [op1] + // 231 form: op1 = (op2 * op3) + [op1] ins = (instruction)(ins + 1); op1Reg = op3->GetRegNum(); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index ef264f9de1fa40..adf35a06c13776 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6337,9 +6337,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { bool supportsRegOptional = false; - unsigned flag = node->GetFMAOverwritten(op1, op2, op3); + unsigned overwrittenOpNum = node->GetOverwrittenOpNumForFMA(op1, op2, op3); - switch (flag) + switch (overwrittenOpNum) { case 1: { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 6714d9abb9fcab..be95569cca61aa 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2328,12 +2328,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); - unsigned flag = intrinsicTree->GetFMAOverwritten(op1, op2, op3); + unsigned overwrittenOpNum = intrinsicTree->GetOverwrittenOpNumForFMA(op1, op2, op3); // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (flag == 1) + if (overwrittenOpNum == 1) { // 132 form: op1 = (op1 * op3) + [op2] @@ -2343,7 +2343,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += BuildOperandUses(op2); srcCount += BuildDelayFreeUses(op3, op1); } - else if (flag == 3) + else if (overwrittenOpNum == 3) { // 231 form: op1 = (op2 * op3) + [op1] From 9a6ae447d7d49f226c1ca0a89a270985c40297b5 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Fri, 27 Aug 2021 13:31:34 -0700 Subject: [PATCH 15/38] Add assertions --- src/coreclr/jit/gentree.cpp | 1 + src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 1 + src/coreclr/jit/lsraxarch.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 2e71da04174e14..389204dea00d43 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21828,6 +21828,7 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function + assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); GenTreeLclVarCommon* overwritten = gtNext->AsLclVarCommon(); assert(overwritten->OperIs(GT_STORE_LCL_VAR, GT_LCL_VAR)); unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 7f0947ece99a8e..27865bfbd84eee 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2154,6 +2154,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) else { // 213 form: op1 = (op2 * op1) + [op3] + assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); op1Reg = op1->GetRegNum(); op2Reg = op2->GetRegNum(); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index be95569cca61aa..9694ab360577fd 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2356,6 +2356,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) else { // 213 form: op1 = (op2 * op1) + [op3] + assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); tgtPrefUse = BuildUse(op1); srcCount += 1; From ffcff76140de01e0fefe844288aa3cc200ae2240 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 7 Sep 2021 11:45:41 -0700 Subject: [PATCH 16/38] Get use of FMA with TryGetUse --- src/coreclr/jit/gentree.cpp | 14 ++++++++------ src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 10 ++++++++-- src/coreclr/jit/lowerxarch.cpp | 8 +++++++- src/coreclr/jit/lsraxarch.cpp | 10 ++++++++-- 5 files changed, 32 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 389204dea00d43..6701b37dfb4729 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21825,22 +21825,24 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const // Return Value: // The operand number or 0 if not overwritten. // -unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* op1, GenTree* op2, GenTree* op3) +unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); - GenTreeLclVarCommon* overwritten = gtNext->AsLclVarCommon(); - assert(overwritten->OperIs(GT_STORE_LCL_VAR, GT_LCL_VAR)); + if (!use->OperIs(GT_STORE_LCL_VAR)) + return 0; + GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); + unsigned overwrittenLclNum = overwritten->GetLclNum(); unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 - if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { overwrittenOpNum = 1; } - else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { overwrittenOpNum = 2; } - else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum()) + else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { overwrittenOpNum = 3; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index cb6437be696332..bd8e767953279c 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5278,7 +5278,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic { return (gtFlags & GTF_SIMDASHW_OP) != 0; } - unsigned GetOverwrittenOpNumForFMA(GenTree* op1, GenTree* op2, GenTree* op3); + unsigned GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3); #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 27865bfbd84eee..6d61972223a0a2 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2131,7 +2131,12 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - unsigned overwrittenOpNum = node->GetOverwrittenOpNumForFMA(op1, op2, op3); + unsigned overwrittenOpNum = 0; + LIR::Use use; + if (LIR::AsRange(compiler->compCurBB).TryGetUse(node, &use)) + { + overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + } if (overwrittenOpNum == 1) { @@ -2153,7 +2158,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { - // 213 form: op1 = (op2 * op1) + [op3] + // op2 = op1 * op2 + op3 + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); op1Reg = op1->GetRegNum(); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index adf35a06c13776..97bdf80b6b711a 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6336,8 +6336,14 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if ((intrinsicId >= NI_FMA_MultiplyAdd) && (intrinsicId <= NI_FMA_MultiplySubtractNegatedScalar)) { bool supportsRegOptional = false; + unsigned overwrittenOpNum = 0; + LIR::Use use; + + if (BlockRange().TryGetUse(node, &use)) + { + overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + } - unsigned overwrittenOpNum = node->GetOverwrittenOpNumForFMA(op1, op2, op3); switch (overwrittenOpNum) { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 9694ab360577fd..1617222af9203a 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2328,7 +2328,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); - unsigned overwrittenOpNum = intrinsicTree->GetOverwrittenOpNumForFMA(op1, op2, op3); + unsigned overwrittenOpNum = 0; + LIR::Use use; + if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(intrinsicTree, &use)) + { + overwrittenOpNum = intrinsicTree->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + } // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); @@ -2355,7 +2360,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { - // 213 form: op1 = (op2 * op1) + [op3] + // op2 = op1 * op2 + op3 + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); tgtPrefUse = BuildUse(op1); From 5641f8f992fc0510c9bae05f65aabde82cbc33ae Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 8 Sep 2021 11:13:53 -0700 Subject: [PATCH 17/38] Decide FMA form with two conditions, OverwrittenOpNum and isContained --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 75 +++++++++++++-------- src/coreclr/jit/lowerxarch.cpp | 53 ++++++++++----- src/coreclr/jit/lsraxarch.cpp | 69 +++++++++++++------ 3 files changed, 132 insertions(+), 65 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 6d61972223a0a2..d03300900efb3a 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2140,47 +2140,66 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) if (overwrittenOpNum == 1) { - // 132 form: op1 = (op1 * op3) + [op2] - - ins = (instruction)(ins - 1); - op1Reg = op1->GetRegNum(); - op2Reg = op3->GetRegNum(); - op3 = op2; + if (op2->isContained()) + { + // op1 = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + ins = (instruction)(ins - 1); + op1Reg = op1->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op2; + } + else + { + // op1 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + op1Reg = op1->GetRegNum(); + op2Reg = op2->GetRegNum(); + } } else if (overwrittenOpNum == 3) { - // 231 form: op1 = (op2 * op3) + [op1] - + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + // One of the following: + // op3 = ([op1] * op2) + op3 + // op3 = (op1 * [op2]) + op3 ins = (instruction)(ins + 1); op1Reg = op3->GetRegNum(); - op2Reg = op2->GetRegNum(); - op3 = op1; + if (op1->isContained()) + { + // op3 = ([op1] * op2) + op3 + op2Reg = op2->GetRegNum(); + op3 = op1; + } + else + { + // op3 = (op1 * [op2]) + op3 + op2Reg = op1->GetRegNum(); + op3 = op2; + } } else { // op2 = op1 * op2 + op3 // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); + if (op1->isContained()) + { + // op2 = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + op1Reg = op2->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op2; - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); - - isCommutative = !copiesUpperBits; - } - - if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) - { - assert(node->isRMWHWIntrinsic(compiler)); - - // We have "reg2 = (reg1 * reg2) +/- op3" where "reg1 != reg2" on a RMW intrinsic. - // - // For non-commutative intrinsics, we should have ensured that op2 was marked - // delay free in order to prevent it from getting assigned the same register - // as target. However, for commutative intrinsics, we can just swap the operands - // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. + } + else + { + // op2 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + op1Reg = op2->GetRegNum(); + op2Reg = op1->GetRegNum(); + } - op2Reg = op1Reg; - op1Reg = targetReg; } genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 97bdf80b6b711a..62ff59f326738c 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6344,50 +6344,68 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); } - switch (overwrittenOpNum) { case 1: { if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) { - // 132 form: op1 = (op1 * op3) + [op2] + // op1 = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 MakeSrcContained(node, op2); } + else if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + { + // op1 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + MakeSrcContained(node, op3); + + } else { assert(supportsRegOptional); - - // 132 form: op1 = (op1 * op3) + [op2] - op2->SetRegOptional(); + // op1 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + op3->SetRegOptional(); } break; } case 3: { - if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + // One of the following: + // op3 = ([op1] * op2) + op3 + // op3 = (op1 * [op2]) + op3 + if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { - // Intrinsics with CopyUpperBits semantics cannot have op1 be contained + // Intrinsics with CopyUpperBits semantics cannot have op1 be contained - if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) - { - // 231 form: op1 = (op2 * op3) + [op1] - MakeSrcContained(node, op1); - } + MakeSrcContained(node, op1); + } + else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) + { + MakeSrcContained(node, op2); } else { assert(supportsRegOptional); - // 231 form: op1 = (op2 * op3) + [op1] - op1->SetRegOptional(); + op2->SetRegOptional(); } break; } default: { - if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); + if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + { + // op2 = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + MakeSrcContained(node, op1); + } + else if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) { - // 213 form: op1 = (op2 * op1) + [op3] + // op2 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] MakeSrcContained(node, op3); } else @@ -6399,7 +6417,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // it can only be so if CopyUpperBits is off. // https://github.com/dotnet/runtime/issues/6358 - // 213 form: op1 = (op2 * op1) + [op3] + // op2 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] op3->SetRegOptional(); } } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 1617222af9203a..903ca4e4352a5f 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2340,44 +2340,73 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (overwrittenOpNum == 1) { - // 132 form: op1 = (op1 * op3) + [op2] + if (op2->isContained()) + { + // op1 = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + tgtPrefUse = BuildUse(op1); - tgtPrefUse = BuildUse(op1); + srcCount += 1; + srcCount += BuildOperandUses(op2); + srcCount += BuildDelayFreeUses(op3, op1); + } + else + { + //assert(op3->isContained()); + // op1 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + tgtPrefUse = BuildUse(op1); - srcCount += 1; - srcCount += BuildOperandUses(op2); - srcCount += BuildDelayFreeUses(op3, op1); + srcCount += 1; + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + srcCount += BuildDelayFreeUses(op2, op1); + + } } else if (overwrittenOpNum == 3) { - // 231 form: op1 = (op2 * op3) + [op1] - + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + // One of the following: + // op3 = ([op1] * op2) + op3 + // op3 = (op1 * [op2]) + op3 tgtPrefUse = BuildUse(op3); - - srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2, op1); srcCount += 1; + if (op1->isContained()) + { + srcCount += BuildOperandUses(op1); + srcCount += BuildDelayFreeUses(op2, op3); + } + else + { + //assert(op2->isContained()); + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); + srcCount += BuildDelayFreeUses(op1, op3); + } + } else { - // op2 = op1 * op2 + op3 - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); - tgtPrefUse = BuildUse(op1); - srcCount += 1; - - if (copiesUpperBits) + if (op1->isContained()) { - srcCount += BuildDelayFreeUses(op2, op1); + // op2 = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + tgtPrefUse = BuildUse(op2); + srcCount += 1; + srcCount += BuildOperandUses(op1); + srcCount += BuildDelayFreeUses(op3, op2); } else { - tgtPrefUse2 = BuildUse(op2); + // op2 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + + tgtPrefUse = BuildUse(op2); srcCount += 1; + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + srcCount += BuildDelayFreeUses(op1, op2); } - - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); } buildUses = false; From b7312ac463b27932ef3d7816fb004e5ff0b6e2c5 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Thu, 9 Sep 2021 21:37:29 -0700 Subject: [PATCH 18/38] Fix op reg error in codegen --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index d03300900efb3a..81e1f8c662b53f 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2180,16 +2180,15 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { - // op2 = op1 * op2 + op3 - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); if (op1->isContained()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + ins = (instruction)(ins - 1); op1Reg = op2->GetRegNum(); op2Reg = op3->GetRegNum(); - op3 = op2; + op3 = op1; } else From a325fe32299112a40744367fe61173b346c699fd Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 15 Sep 2021 13:30:05 -0700 Subject: [PATCH 19/38] Decide form using lastUse and isContained in no overwritten case --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 91 ++++++++++++++++++++- src/coreclr/jit/lowerxarch.cpp | 43 +++++++++- src/coreclr/jit/lsraxarch.cpp | 78 +++++++++++++++++- 3 files changed, 206 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 81e1f8c662b53f..59b49dfa68fa6d 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2178,9 +2178,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op3 = op2; } } - else + else if (overwrittenOpNum == 2) { - assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); if (op1->isContained()) { // op2 = ([op1] * op2) + op3 @@ -2200,6 +2199,94 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } } + else + { + // No overwritten: + // result = (op1 * op2) + op3 + // decide form with contained op and lastUse + assert(overwrittenOpNum == 0); + if (op2->isContained() || op2->isUsedFromSpillTemp()) + { + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + { + // result = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + + ins = (instruction)(ins - 1); + op1Reg = op1->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op2; + } + else + { + // result = (op1 * [op2]) + op3 + // 231 form XMM1 = XMM2 * [XMM3] + XMM1 + + ins = (instruction)(ins + 1); + op1Reg = op3->GetRegNum(); + op2Reg = op1->GetRegNum(); + op3 = op2; + } + } + else if (op1->isContained() || op1->isUsedFromSpillTemp()) + { + if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0)) + { + // result = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + + ins = (instruction)(ins - 1); + op1Reg = op2->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op1; + + + } + else + { + // result = ([op1] * op2) + op3 + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + + ins = (instruction)(ins + 1); + op1Reg = op3->GetRegNum(); + op2Reg = op2->GetRegNum(); + op3 = op1; + } + } + else + { + // op3 isContained or regOptional + // result = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + //if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + //{ + op1Reg = op1->GetRegNum(); + op2Reg = op2->GetRegNum(); + //} + //else + //{ + // op1Reg = op2->GetRegNum(); + // op2Reg = op1->GetRegNum(); + //} + //isCommutative = !copiesUpperBits; + isCommutative = true; + } + + if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) + { + assert(node->isRMWHWIntrinsic(compiler)); + + // We have "reg2 = (reg1 * reg2) +/- op3" where "reg1 != reg2" on a RMW intrinsic. + // + // For non-commutative intrinsics, we should have ensured that op2 was marked + // delay free in order to prevent it from getting assigned the same register + // as target. However, for commutative intrinsics, we can just swap the operands + // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. + + op2Reg = op1Reg; + op1Reg = targetReg; + } + } genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3); genProduceReg(node); diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 62ff59f326738c..eaefba51c7ba89 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6393,9 +6393,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) } break; } - default: + case 2: { - assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { // op2 = ([op1] * op2) + op3 @@ -6421,6 +6420,46 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] op3->SetRegOptional(); } + break; + } + default: + { + // No overwritten: + // result = (op1 * op2) + op3 + // decide form with contained op and lastUse + assert(overwrittenOpNum == 0); + if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + { + // result = (op1 * op2) + [op3] + MakeSrcContained(node, op3); + } + else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) + { + // result = (op1 * [op2]) + op3 + MakeSrcContained(node, op2); + } + else if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) + { + // Intrinsics with CopyUpperBits semantics cannot have op1 be contained + + if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + { + // result = ([op1] * op2) + op3 + MakeSrcContained(node, op1); + } + } + else + { + assert(supportsRegOptional); + + // TODO-XArch-CQ: Technically any one of the three operands can + // be reg-optional. With a limitation on op1 where + // it can only be so if CopyUpperBits is off. + // https://github.com/dotnet/runtime/issues/6358 + + // result = (op1 * op2) + op3 + op3->SetRegOptional(); + } } } } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 903ca4e4352a5f..fab45080212f75 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2384,9 +2384,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } } - else + else if (overwrittenOpNum == 2) { - assert(overwrittenOpNum == 2 || overwrittenOpNum == 0); if (op1->isContained()) { @@ -2408,6 +2407,81 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += BuildDelayFreeUses(op1, op2); } } + else + { + // No overwritten: + // result = (op1 * op2) + op3 + // decide form with contained op and lastUse + assert(overwrittenOpNum == 0); + if (op2->isContained()) + { + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + { + // result = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + + tgtPrefUse = BuildUse(op1); + srcCount += 1; + srcCount += BuildOperandUses(op2); + srcCount += BuildDelayFreeUses(op3, op1); + } + else + { + // result = (op1 * [op2]) + op3 + // 231 form XMM1 = XMM2 * [XMM3] + XMM1 + + tgtPrefUse = BuildUse(op3); + srcCount += 1; + srcCount += BuildOperandUses(op2); + srcCount += BuildDelayFreeUses(op1, op3); + } + } + else if (op1->isContained()) + { + if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0)) + { + // result = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + + tgtPrefUse = BuildUse(op2); + srcCount += 1; + srcCount += BuildOperandUses(op1); + srcCount += BuildDelayFreeUses(op3, op2); + } + else + { + // result = ([op1] * op2) + op3 + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + + tgtPrefUse = BuildUse(op3); + + srcCount += BuildOperandUses(op1); + srcCount += BuildDelayFreeUses(op2, op1); + srcCount += 1; + } + } + else + { + // op3 isContained or regOptional + // result = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + + //if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + //{ + tgtPrefUse = BuildUse(op1); + srcCount += 1; + srcCount += BuildDelayFreeUses(op2, op1); + //} + //else + //{ + // tgtPrefUse = BuildUse(op2); + // srcCount += 1; + // srcCount += BuildDelayFreeUses(op1, op2); + //} + + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + } + } buildUses = false; break; From 0f950dd3619ff9122babbb8e0641d48d53d93755 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Sat, 18 Sep 2021 12:42:18 -0700 Subject: [PATCH 20/38] Clean up code --- src/coreclr/jit/gentree.cpp | 39 ++++-- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 89 +------------ src/coreclr/jit/lowerxarch.cpp | 140 ++++---------------- src/coreclr/jit/lsraxarch.cpp | 114 ++-------------- 4 files changed, 70 insertions(+), 312 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 6701b37dfb4729..0a8fe9ec385610 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21820,31 +21820,42 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ -// GetOverwrittenOpNumForFMA: check if the result is written into one of the operands +// GetOverwrittenOpNumForFMA: check if the result is written into one of the operands. +// In the case that none of the operand is overwritten, check if any of them is lastUse. // // Return Value: -// The operand number or 0 if not overwritten. +// The operand number overwritten or lastUse. 2 is the default value, where no overwritten and no lastUse. // unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); + unsigned overwrittenOpNum = 2; // 1->op1, 2->op2, 3->op3 if (!use->OperIs(GT_STORE_LCL_VAR)) - return 0; - GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); - unsigned overwrittenLclNum = overwritten->GetLclNum(); - unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 - if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) - { - overwrittenOpNum = 1; - } - else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { - overwrittenOpNum = 2; + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + overwrittenOpNum = 1; + else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) + overwrittenOpNum = 3; + else + overwrittenOpNum = 2; } - else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) + else { - overwrittenOpNum = 3; + GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); + unsigned overwrittenLclNum = overwritten->GetLclNum(); + if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) + { + overwrittenOpNum = 1; + } + else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) + { + overwrittenOpNum = 2; + } + else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) + { + overwrittenOpNum = 3; + } } return overwrittenOpNum; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 59b49dfa68fa6d..7e65d494bc8b62 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2138,9 +2138,10 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); } - if (overwrittenOpNum == 1) + // Intrinsics with CopyUpperBits semantics must have op1 as target + if (overwrittenOpNum == 1 || copiesUpperBits) { - if (op2->isContained()) + if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 @@ -2156,13 +2157,11 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op1Reg = op1->GetRegNum(); op2Reg = op2->GetRegNum(); } + isCommutative = copiesUpperBits; } else if (overwrittenOpNum == 3) { // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - // One of the following: - // op3 = ([op1] * op2) + op3 - // op3 = (op1 * [op2]) + op3 ins = (instruction)(ins + 1); op1Reg = op3->GetRegNum(); if (op1->isContained()) @@ -2178,8 +2177,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op3 = op2; } } - else if (overwrittenOpNum == 2) + else { + assert(overwrittenOpNum == 2); if (op1->isContained()) { // op2 = ([op1] * op2) + op3 @@ -2197,82 +2197,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op1Reg = op2->GetRegNum(); op2Reg = op1->GetRegNum(); } - } - else - { - // No overwritten: - // result = (op1 * op2) + op3 - // decide form with contained op and lastUse - assert(overwrittenOpNum == 0); - if (op2->isContained() || op2->isUsedFromSpillTemp()) - { - if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - { - // result = (op1 * [op2]) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - - ins = (instruction)(ins - 1); - op1Reg = op1->GetRegNum(); - op2Reg = op3->GetRegNum(); - op3 = op2; - } - else - { - // result = (op1 * [op2]) + op3 - // 231 form XMM1 = XMM2 * [XMM3] + XMM1 - - ins = (instruction)(ins + 1); - op1Reg = op3->GetRegNum(); - op2Reg = op1->GetRegNum(); - op3 = op2; - } - } - else if (op1->isContained() || op1->isUsedFromSpillTemp()) - { - if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0)) - { - // result = ([op1] * op2) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - - ins = (instruction)(ins - 1); - op1Reg = op2->GetRegNum(); - op2Reg = op3->GetRegNum(); - op3 = op1; - - } - else - { - // result = ([op1] * op2) + op3 - // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - - ins = (instruction)(ins + 1); - op1Reg = op3->GetRegNum(); - op2Reg = op2->GetRegNum(); - op3 = op1; - } - } - else - { - // op3 isContained or regOptional - // result = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - //if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - //{ - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); - //} - //else - //{ - // op1Reg = op2->GetRegNum(); - // op2Reg = op1->GetRegNum(); - //} - //isCommutative = !copiesUpperBits; - isCommutative = true; - } - - if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) + if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) { assert(node->isRMWHWIntrinsic(compiler)); @@ -2286,8 +2213,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op2Reg = op1Reg; op1Reg = targetReg; } - } - genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3); genProduceReg(node); } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index eaefba51c7ba89..82c08f22e78c22 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6336,6 +6336,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if ((intrinsicId >= NI_FMA_MultiplyAdd) && (intrinsicId <= NI_FMA_MultiplySubtractNegatedScalar)) { bool supportsRegOptional = false; + bool supportsRegOptional2 = false; unsigned overwrittenOpNum = 0; LIR::Use use; @@ -6344,123 +6345,30 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); } - switch (overwrittenOpNum) + if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { - case 1: - { - if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) - { - // op1 = (op1 * [op2]) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - MakeSrcContained(node, op2); - } - else if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) - { - // op1 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - MakeSrcContained(node, op3); - - } - else - { - assert(supportsRegOptional); - // op1 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op3->SetRegOptional(); - } - break; - } - case 3: - { - // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - // One of the following: - // op3 = ([op1] * op2) + op3 - // op3 = (op1 * [op2]) + op3 - if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) - { - // Intrinsics with CopyUpperBits semantics cannot have op1 be contained - - MakeSrcContained(node, op1); - } - else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) - { - MakeSrcContained(node, op2); - } - else - { - assert(supportsRegOptional); - op2->SetRegOptional(); - } - break; - } - case 2: - { - if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) - { - // op2 = ([op1] * op2) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - MakeSrcContained(node, op1); - } - else if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) - { - // op2 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - MakeSrcContained(node, op3); - } - else - { - assert(supportsRegOptional); - - // TODO-XArch-CQ: Technically any one of the three operands can - // be reg-optional. With a limitation on op1 where - // it can only be so if CopyUpperBits is off. - // https://github.com/dotnet/runtime/issues/6358 - - // op2 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op3->SetRegOptional(); - } - break; - } - default: - { - // No overwritten: - // result = (op1 * op2) + op3 - // decide form with contained op and lastUse - assert(overwrittenOpNum == 0); - if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) - { - // result = (op1 * op2) + [op3] - MakeSrcContained(node, op3); - } - else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) - { - // result = (op1 * [op2]) + op3 - MakeSrcContained(node, op2); - } - else if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional)) - { - // Intrinsics with CopyUpperBits semantics cannot have op1 be contained - - if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) - { - // result = ([op1] * op2) + op3 - MakeSrcContained(node, op1); - } - } - else - { - assert(supportsRegOptional); - - // TODO-XArch-CQ: Technically any one of the three operands can - // be reg-optional. With a limitation on op1 where - // it can only be so if CopyUpperBits is off. - // https://github.com/dotnet/runtime/issues/6358 - - // result = (op1 * op2) + op3 - op3->SetRegOptional(); - } - } + // result = ([op1] * op2) + op3 + MakeSrcContained(node, op1); + } + else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) + { + // result = (op1 * [op2]) + op3 + MakeSrcContained(node, op2); + } + else if (overwrittenOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + { + // result = (op1 * op2) + [op3] + MakeSrcContained(node, op3); + } + else if (overwrittenOpNum == 3) + { + assert(supportsRegOptional2); + op2->SetRegOptional(); + } + else + { + assert(supportsRegOptional); + op3->SetRegOptional(); } } else diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index fab45080212f75..a60237366e1211 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2338,26 +2338,21 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - if (overwrittenOpNum == 1) + // Intrinsics with CopyUpperBits semantics must have op1 as target + if (overwrittenOpNum == 1 || copiesUpperBits) { - if (op2->isContained()) + tgtPrefUse = BuildUse(op1); + srcCount += 1; + + if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - tgtPrefUse = BuildUse(op1); - - srcCount += 1; srcCount += BuildOperandUses(op2); srcCount += BuildDelayFreeUses(op3, op1); } else { - //assert(op3->isContained()); // op1 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - tgtPrefUse = BuildUse(op1); - - srcCount += 1; srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); srcCount += BuildDelayFreeUses(op2, op1); @@ -2365,124 +2360,43 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else if (overwrittenOpNum == 3) { - // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - // One of the following: - // op3 = ([op1] * op2) + op3 - // op3 = (op1 * [op2]) + op3 tgtPrefUse = BuildUse(op3); srcCount += 1; + if (op1->isContained()) { + // op3 = ([op1] * op2) + op3 srcCount += BuildOperandUses(op1); srcCount += BuildDelayFreeUses(op2, op3); } else { - //assert(op2->isContained()); + // op3 = (op1 * [op2]) + op3 srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); srcCount += BuildDelayFreeUses(op1, op3); } } - else if (overwrittenOpNum == 2) + else { + assert(overwrittenOpNum == 2); + tgtPrefUse = BuildUse(op2); + srcCount += 1; if (op1->isContained()) { // op2 = ([op1] * op2) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - tgtPrefUse = BuildUse(op2); - srcCount += 1; srcCount += BuildOperandUses(op1); srcCount += BuildDelayFreeUses(op3, op2); } else { // op2 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - - tgtPrefUse = BuildUse(op2); - srcCount += 1; srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); srcCount += BuildDelayFreeUses(op1, op2); } } - else - { - // No overwritten: - // result = (op1 * op2) + op3 - // decide form with contained op and lastUse - assert(overwrittenOpNum == 0); - if (op2->isContained()) - { - if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - { - // result = (op1 * [op2]) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - - tgtPrefUse = BuildUse(op1); - srcCount += 1; - srcCount += BuildOperandUses(op2); - srcCount += BuildDelayFreeUses(op3, op1); - } - else - { - // result = (op1 * [op2]) + op3 - // 231 form XMM1 = XMM2 * [XMM3] + XMM1 - - tgtPrefUse = BuildUse(op3); - srcCount += 1; - srcCount += BuildOperandUses(op2); - srcCount += BuildDelayFreeUses(op1, op3); - } - } - else if (op1->isContained()) - { - if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0)) - { - // result = ([op1] * op2) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - - tgtPrefUse = BuildUse(op2); - srcCount += 1; - srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op3, op2); - } - else - { - // result = ([op1] * op2) + op3 - // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - - tgtPrefUse = BuildUse(op3); - - srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2, op1); - srcCount += 1; - } - } - else - { - // op3 isContained or regOptional - // result = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - - //if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - //{ - tgtPrefUse = BuildUse(op1); - srcCount += 1; - srcCount += BuildDelayFreeUses(op2, op1); - //} - //else - //{ - // tgtPrefUse = BuildUse(op2); - // srcCount += 1; - // srcCount += BuildDelayFreeUses(op1, op2); - //} - - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); - } - } - + buildUses = false; break; } From 33a596d1aaa4d679a3dd35258c0826ed620d587a Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Mon, 20 Sep 2021 08:37:56 -0700 Subject: [PATCH 21/38] Separate default case overwrittenOpNum==0 --- src/coreclr/jit/gentree.cpp | 4 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 59 ++++++++++++++++----- src/coreclr/jit/lsraxarch.cpp | 48 +++++++++++++---- 3 files changed, 87 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 0a8fe9ec385610..314332305fae39 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21824,13 +21824,13 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const // In the case that none of the operand is overwritten, check if any of them is lastUse. // // Return Value: -// The operand number overwritten or lastUse. 2 is the default value, where no overwritten and no lastUse. +// The operand number overwritten or lastUse. 0 is the default value, where no overwritten and no lastUse. // unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); - unsigned overwrittenOpNum = 2; // 1->op1, 2->op2, 3->op3 + unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 if (!use->OperIs(GT_STORE_LCL_VAR)) { if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 7e65d494bc8b62..3e56351d0d4b0e 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2177,9 +2177,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op3 = op2; } } - else + else if (overwrittenOpNum == 2) { - assert(overwrittenOpNum == 2); if (op1->isContained()) { // op2 = ([op1] * op2) + op3 @@ -2198,21 +2197,55 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op2Reg = op1->GetRegNum(); } } + else + { + assert(overwrittenOpNum == 0); + if (op1->isContained()) + { + // op2 = ([op1] * op2) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + ins = (instruction)(ins - 1); + op1Reg = op2->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op1; + } + else + { + if (op2->isContained() || op2->IsRegOptional()) + { + // op1 = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + ins = (instruction)(ins - 1); + op1Reg = op1->GetRegNum(); + op2Reg = op3->GetRegNum(); + op3 = op2; + } + else + { + // op1 = (op1 * op2) + [op3] + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + op1Reg = op1->GetRegNum(); + op2Reg = op2->GetRegNum(); + } + isCommutative = copiesUpperBits; + } + + } if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) - { - assert(node->isRMWHWIntrinsic(compiler)); + { + assert(node->isRMWHWIntrinsic(compiler)); - // We have "reg2 = (reg1 * reg2) +/- op3" where "reg1 != reg2" on a RMW intrinsic. - // - // For non-commutative intrinsics, we should have ensured that op2 was marked - // delay free in order to prevent it from getting assigned the same register - // as target. However, for commutative intrinsics, we can just swap the operands - // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. + // We have "reg2 = (reg1 * reg2) +/- op3" where "reg1 != reg2" on a RMW intrinsic. + // + // For non-commutative intrinsics, we should have ensured that op2 was marked + // delay free in order to prevent it from getting assigned the same register + // as target. However, for commutative intrinsics, we can just swap the operands + // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. - op2Reg = op1Reg; - op1Reg = targetReg; - } + op2Reg = op1Reg; + op1Reg = targetReg; + } genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3); genProduceReg(node); } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index a60237366e1211..43c46dbbb3a3ac 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2347,7 +2347,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 - srcCount += BuildOperandUses(op2); + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); srcCount += BuildDelayFreeUses(op3, op1); } else @@ -2358,6 +2358,24 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } } + else if (overwrittenOpNum == 2) + { + tgtPrefUse = BuildUse(op2); + srcCount += 1; + + if (op1->isContained()) + { + // op2 = ([op1] * op2) + op3 + srcCount += BuildOperandUses(op1); + srcCount += BuildDelayFreeUses(op3, op2); + } + else + { + // op2 = (op1 * op2) + [op3] + srcCount += BuildDelayFreeUses(op1, op2); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + } + } else if (overwrittenOpNum == 3) { tgtPrefUse = BuildUse(op3); @@ -2379,22 +2397,34 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { - assert(overwrittenOpNum == 2); - tgtPrefUse = BuildUse(op2); - srcCount += 1; - + assert(overwrittenOpNum == 0); if (op1->isContained()) { - // op2 = ([op1] * op2) + op3 + tgtPrefUse = BuildUse(op2); + srcCount += 1; + // result = ([op1] * op2) + op3 srcCount += BuildOperandUses(op1); srcCount += BuildDelayFreeUses(op3, op2); } else { - // op2 = (op1 * op2) + [op3] - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); - srcCount += BuildDelayFreeUses(op1, op2); + tgtPrefUse = BuildUse(op1); + srcCount += 1; + + if (op2->isContained() || op2->IsRegOptional()) + { + // result = (op1 * [op2]) + op3 + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); + srcCount += BuildDelayFreeUses(op3, op1); + } + else + { + // result = (op1 * op2) + [op3] + srcCount += BuildDelayFreeUses(op2, op1); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + } } + } buildUses = false; From 5da93687ba569db9f4b3a0555df65ed931bd506a Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 29 Sep 2021 09:33:07 -0700 Subject: [PATCH 22/38] Apply format patch --- src/coreclr/jit/gentree.cpp | 6 ++-- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 2 -- src/coreclr/jit/lowerxarch.cpp | 31 +++++++++++---------- src/coreclr/jit/lsraxarch.cpp | 5 +--- 4 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 314332305fae39..6c215039d68e90 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21833,7 +21833,7 @@ unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 if (!use->OperIs(GT_STORE_LCL_VAR)) { - if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) overwrittenOpNum = 1; else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) overwrittenOpNum = 3; @@ -21842,8 +21842,8 @@ unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op } else { - GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); - unsigned overwrittenLclNum = overwritten->GetLclNum(); + GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); + unsigned overwrittenLclNum = overwritten->GetLclNum(); if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { overwrittenOpNum = 1; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 3e56351d0d4b0e..c758b03deed3ee 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2187,7 +2187,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op1Reg = op2->GetRegNum(); op2Reg = op3->GetRegNum(); op3 = op1; - } else { @@ -2229,7 +2228,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } isCommutative = copiesUpperBits; } - } if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 82c08f22e78c22..962578711ba872 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6335,9 +6335,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { if ((intrinsicId >= NI_FMA_MultiplyAdd) && (intrinsicId <= NI_FMA_MultiplySubtractNegatedScalar)) { - bool supportsRegOptional = false; - bool supportsRegOptional2 = false; - unsigned overwrittenOpNum = 0; + bool supportsRegOptional = false; + bool supportsRegOptional2 = false; + unsigned overwrittenOpNum = 0; LIR::Use use; if (BlockRange().TryGetUse(node, &use)) @@ -6345,30 +6345,31 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); } - if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && + !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { - // result = ([op1] * op2) + op3 - MakeSrcContained(node, op1); + // result = ([op1] * op2) + op3 + MakeSrcContained(node, op1); } - else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) + else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) { - // result = (op1 * [op2]) + op3 - MakeSrcContained(node, op2); + // result = (op1 * [op2]) + op3 + MakeSrcContained(node, op2); } else if (overwrittenOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) { - // result = (op1 * op2) + [op3] - MakeSrcContained(node, op3); + // result = (op1 * op2) + [op3] + MakeSrcContained(node, op3); } else if (overwrittenOpNum == 3) { - assert(supportsRegOptional2); - op2->SetRegOptional(); + assert(supportsRegOptional2); + op2->SetRegOptional(); } else { - assert(supportsRegOptional); - op3->SetRegOptional(); + assert(supportsRegOptional); + op3->SetRegOptional(); } } else diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 43c46dbbb3a3ac..a4a1a888c309e0 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2355,7 +2355,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // op1 = (op1 * op2) + [op3] srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); srcCount += BuildDelayFreeUses(op2, op1); - } } else if (overwrittenOpNum == 2) @@ -2393,7 +2392,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); srcCount += BuildDelayFreeUses(op1, op3); } - } else { @@ -2424,9 +2422,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); } } - } - + buildUses = false; break; } From c3a9f07e9dcbda4fc8e769efef6d5fa39582182f Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Fri, 1 Oct 2021 10:23:05 -0700 Subject: [PATCH 23/38] Change variable and function names --- src/coreclr/jit/gentree.cpp | 19 +++++++-------- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 26 +++++++++++---------- src/coreclr/jit/lowerxarch.cpp | 12 +++++----- src/coreclr/jit/lsraxarch.cpp | 17 +++++++++----- 5 files changed, 41 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 6c215039d68e90..4eea7abacf9bba 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21824,21 +21824,20 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const // In the case that none of the operand is overwritten, check if any of them is lastUse. // // Return Value: -// The operand number overwritten or lastUse. 0 is the default value, where no overwritten and no lastUse. +// The operand number overwritten or lastUse. 0 is the default value, where the result is written into a destination that is not one of the source operands. // -unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) +unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); - unsigned overwrittenOpNum = 0; // 1->op1, 2->op2, 3->op3 if (!use->OperIs(GT_STORE_LCL_VAR)) { if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - overwrittenOpNum = 1; + return 1; else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) - overwrittenOpNum = 3; + return 3; else - overwrittenOpNum = 2; + return 2; } else { @@ -21846,19 +21845,19 @@ unsigned GenTreeHWIntrinsic::GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op unsigned overwrittenLclNum = overwritten->GetLclNum(); if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { - overwrittenOpNum = 1; + return 1; } else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { - overwrittenOpNum = 2; + return 2; } else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) { - overwrittenOpNum = 3; + return 3; } } - return overwrittenOpNum; + return 0; } #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index bd8e767953279c..16910147345dce 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -5278,7 +5278,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic { return (gtFlags & GTF_SIMDASHW_OP) != 0; } - unsigned GetOverwrittenOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3); + unsigned GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3); #if DEBUGGABLE_GENTREE GenTreeHWIntrinsic() : GenTreeJitIntrinsic() diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index c758b03deed3ee..97f84f4fda9c24 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2106,7 +2106,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; var_types baseType = node->GetSimdBaseType(); emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); + instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // 213 form + instruction _132form = (instruction)(ins - 1); + instruction _232form = (instruction)(ins - 1); GenTree* op1 = node->gtGetOp1(); regNumber targetReg = node->GetRegNum(); @@ -2131,21 +2133,21 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - unsigned overwrittenOpNum = 0; + unsigned resultOpNum = 0; LIR::Use use; if (LIR::AsRange(compiler->compCurBB).TryGetUse(node, &use)) { - overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + resultOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); } // Intrinsics with CopyUpperBits semantics must have op1 as target - if (overwrittenOpNum == 1 || copiesUpperBits) + if (resultOpNum == 1 || copiesUpperBits) { if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = (instruction)(ins - 1); + ins = _132form; op1Reg = op1->GetRegNum(); op2Reg = op3->GetRegNum(); op3 = op2; @@ -2159,10 +2161,10 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } isCommutative = copiesUpperBits; } - else if (overwrittenOpNum == 3) + else if (resultOpNum == 3) { // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - ins = (instruction)(ins + 1); + ins = _232form; op1Reg = op3->GetRegNum(); if (op1->isContained()) { @@ -2177,13 +2179,13 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op3 = op2; } } - else if (overwrittenOpNum == 2) + else if (resultOpNum == 2) { if (op1->isContained()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = (instruction)(ins - 1); + ins = _132form; op1Reg = op2->GetRegNum(); op2Reg = op3->GetRegNum(); op3 = op1; @@ -2198,12 +2200,12 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { - assert(overwrittenOpNum == 0); + assert(resultOpNum == 0); if (op1->isContained()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = (instruction)(ins - 1); + ins = _132form; op1Reg = op2->GetRegNum(); op2Reg = op3->GetRegNum(); op3 = op1; @@ -2214,7 +2216,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) { // op1 = (op1 * [op2]) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = (instruction)(ins - 1); + ins = _132form; op1Reg = op1->GetRegNum(); op2Reg = op3->GetRegNum(); op3 = op2; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 962578711ba872..9ba162dbe225c5 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6337,31 +6337,31 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { bool supportsRegOptional = false; bool supportsRegOptional2 = false; - unsigned overwrittenOpNum = 0; + unsigned resultOpNum = 0; LIR::Use use; if (BlockRange().TryGetUse(node, &use)) { - overwrittenOpNum = node->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + resultOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); } - if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && + if (resultOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { // result = ([op1] * op2) + op3 MakeSrcContained(node, op1); } - else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) + else if (resultOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) { // result = (op1 * [op2]) + op3 MakeSrcContained(node, op2); } - else if (overwrittenOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + else if (resultOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) { // result = (op1 * op2) + [op3] MakeSrcContained(node, op3); } - else if (overwrittenOpNum == 3) + else if (resultOpNum == 3) { assert(supportsRegOptional2); op2->SetRegOptional(); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index a4a1a888c309e0..71690bbc01b2ad 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2328,18 +2328,18 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); - unsigned overwrittenOpNum = 0; + unsigned resultOpNum = 0; LIR::Use use; if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(intrinsicTree, &use)) { - overwrittenOpNum = intrinsicTree->GetOverwrittenOpNumForFMA(use.User(), op1, op2, op3); + resultOpNum = intrinsicTree->GetResultOpNumForFMA(use.User(), op1, op2, op3); } // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); // Intrinsics with CopyUpperBits semantics must have op1 as target - if (overwrittenOpNum == 1 || copiesUpperBits) + if (resultOpNum == 1 || copiesUpperBits) { tgtPrefUse = BuildUse(op1); srcCount += 1; @@ -2357,7 +2357,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += BuildDelayFreeUses(op2, op1); } } - else if (overwrittenOpNum == 2) + else if (resultOpNum == 2) { tgtPrefUse = BuildUse(op2); srcCount += 1; @@ -2375,7 +2375,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); } } - else if (overwrittenOpNum == 3) + else if (resultOpNum == 3) { tgtPrefUse = BuildUse(op3); srcCount += 1; @@ -2395,9 +2395,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { - assert(overwrittenOpNum == 0); + assert(resultOpNum == 0); if (op1->isContained()) { + // In the case that result is writtent into destination that is different from any of the + // source operands, we set op2 to be tgtPrefUse when op1 is contained. + tgtPrefUse = BuildUse(op2); srcCount += 1; // result = ([op1] * op2) + op3 @@ -2406,6 +2409,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { + // When op1 is not contained, we set op1 to be tgtPrefUse. + tgtPrefUse = BuildUse(op1); srcCount += 1; From 9e356aa0a57fd46819e97d39352a773b6a112759 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 5 Oct 2021 08:57:55 -0700 Subject: [PATCH 24/38] Update regOptional for op1 and resolve some other comments --- src/coreclr/jit/gentree.cpp | 26 +++++++++++--------- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 27 ++++++++++++--------- src/coreclr/jit/lowerxarch.cpp | 22 ++++++++++------- src/coreclr/jit/lsraxarch.cpp | 8 +++--- 4 files changed, 48 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 4eea7abacf9bba..772316e2b4d399 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21824,23 +21824,17 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const // In the case that none of the operand is overwritten, check if any of them is lastUse. // // Return Value: -// The operand number overwritten or lastUse. 0 is the default value, where the result is written into a destination that is not one of the source operands. +// The operand number overwritten or lastUse. 0 is the default value, where the result is written into +// a destination that is not one of the source operands. // unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); - if (!use->OperIs(GT_STORE_LCL_VAR)) - { - if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - return 1; - else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) - return 3; - else - return 2; - } - else + if (use->OperIs(GT_STORE_LCL_VAR)) { + // For store_lcl_var, check if any op is overwritten + GenTreeLclVarCommon* overwritten = use->AsLclVarCommon(); unsigned overwrittenLclNum = overwritten->GetLclNum(); if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum) @@ -21856,7 +21850,17 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, Ge return 3; } } + else + { + // For LclVar, check if any op is lastUse + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + return 1; + else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) + return 3; + else + return 2; + } return 0; } #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 97f84f4fda9c24..c627447409ab3b 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2154,19 +2154,20 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { + assert(op3->isContained() || op3->IsRegOptional()); // op1 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); + op1Reg = op1->GetRegNum(); + op2Reg = op2->GetRegNum(); + isCommutative = copiesUpperBits; } - isCommutative = copiesUpperBits; } else if (resultOpNum == 3) { // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 ins = _232form; op1Reg = op3->GetRegNum(); - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // op3 = ([op1] * op2) + op3 op2Reg = op2->GetRegNum(); @@ -2174,6 +2175,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { + assert(op2->isContained() || op2->IsRegOptional()); // op3 = (op1 * [op2]) + op3 op2Reg = op1->GetRegNum(); op3 = op2; @@ -2181,7 +2183,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else if (resultOpNum == 2) { - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 @@ -2192,16 +2194,18 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { + assert(op3->isContained() || op3->IsRegOptional()); // op2 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op2->GetRegNum(); - op2Reg = op1->GetRegNum(); + op1Reg = op2->GetRegNum(); + op2Reg = op1->GetRegNum(); + isCommutative = copiesUpperBits; } } else { assert(resultOpNum == 0); - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 @@ -2223,12 +2227,13 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else { + assert(op3->isContained() || op3->IsRegOptional()); // op1 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); + op1Reg = op1->GetRegNum(); + op2Reg = op2->GetRegNum(); + isCommutative = copiesUpperBits; } - isCommutative = copiesUpperBits; } } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 9ba162dbe225c5..2cbae245f6719d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6335,35 +6335,39 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { if ((intrinsicId >= NI_FMA_MultiplyAdd) && (intrinsicId <= NI_FMA_MultiplySubtractNegatedScalar)) { - bool supportsRegOptional = false; - bool supportsRegOptional2 = false; - unsigned resultOpNum = 0; + bool supportsRegOptional = false; + unsigned overwrittenOpNum = 0; LIR::Use use; if (BlockRange().TryGetUse(node, &use)) { - resultOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); + overwrittenOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); } - if (resultOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && + if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { // result = ([op1] * op2) + op3 MakeSrcContained(node, op1); } - else if (resultOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2)) + else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) { // result = (op1 * [op2]) + op3 MakeSrcContained(node, op2); } - else if (resultOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + else if (overwrittenOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) { // result = (op1 * op2) + [op3] MakeSrcContained(node, op3); } - else if (resultOpNum == 3) + else if (overwrittenOpNum != 1 && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { - assert(supportsRegOptional2); + assert(supportsRegOptional); + op1->SetRegOptional(); + } + else if (overwrittenOpNum != 2) + { + assert(supportsRegOptional); op2->SetRegOptional(); } else diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 71690bbc01b2ad..5db3e179675308 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2362,7 +2362,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op2); srcCount += 1; - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // op2 = ([op1] * op2) + op3 srcCount += BuildOperandUses(op1); @@ -2380,7 +2380,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op3); srcCount += 1; - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // op3 = ([op1] * op2) + op3 srcCount += BuildOperandUses(op1); @@ -2396,10 +2396,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) else { assert(resultOpNum == 0); - if (op1->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { // In the case that result is writtent into destination that is different from any of the - // source operands, we set op2 to be tgtPrefUse when op1 is contained. + // source operands, we set op2 to be tgtPrefUse when op1 is contained. tgtPrefUse = BuildUse(op2); srcCount += 1; From f8159bc13db01a287f5399f32deabd52daa4795b Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 13 Oct 2021 09:06:07 -0700 Subject: [PATCH 25/38] Change var names --- src/coreclr/jit/gentree.cpp | 2 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 4 +-- src/coreclr/jit/lowerxarch.cpp | 27 ++++++++++++--------- src/coreclr/jit/lsraxarch.cpp | 2 +- 4 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 772316e2b4d399..25386c820c1fe0 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21820,7 +21820,7 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const #if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ -// GetOverwrittenOpNumForFMA: check if the result is written into one of the operands. +// GetResultOpNumForFMA: check if the result is written into one of the operands. // In the case that none of the operand is overwritten, check if any of them is lastUse. // // Return Value: diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index c627447409ab3b..04fbde028cf256 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2108,7 +2108,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // 213 form instruction _132form = (instruction)(ins - 1); - instruction _232form = (instruction)(ins - 1); + instruction _231form = (instruction)(ins + 1); GenTree* op1 = node->gtGetOp1(); regNumber targetReg = node->GetRegNum(); @@ -2165,7 +2165,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) else if (resultOpNum == 3) { // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - ins = _232form; + ins = _231form; op1Reg = op3->GetRegNum(); if (op1->isContained() || op1->IsRegOptional()) { diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 2cbae245f6719d..28fb947c2b3e69 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6335,44 +6335,49 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { if ((intrinsicId >= NI_FMA_MultiplyAdd) && (intrinsicId <= NI_FMA_MultiplySubtractNegatedScalar)) { - bool supportsRegOptional = false; - unsigned overwrittenOpNum = 0; + bool supportsOp1RegOptional = false; + bool supportsOp2RegOptional = false; + bool supportsOp3RegOptional = false; + unsigned resultOpNum = 0; LIR::Use use; if (BlockRange().TryGetUse(node, &use)) { - overwrittenOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); + resultOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); } - if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) && + // Prioritize Containable op. Check if any one of the op is containable first. + // Set op regOptional only if none of them is containable. + + if (resultOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsOp1RegOptional) && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { // result = ([op1] * op2) + op3 MakeSrcContained(node, op1); } - else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional)) + else if (resultOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsOp2RegOptional)) { // result = (op1 * [op2]) + op3 MakeSrcContained(node, op2); } - else if (overwrittenOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional)) + else if (resultOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsOp3RegOptional)) { // result = (op1 * op2) + [op3] MakeSrcContained(node, op3); } - else if (overwrittenOpNum != 1 && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + else if (resultOpNum != 1 && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { - assert(supportsRegOptional); + assert(supportsOp1RegOptional); op1->SetRegOptional(); } - else if (overwrittenOpNum != 2) + else if (resultOpNum != 2) { - assert(supportsRegOptional); + assert(supportsOp2RegOptional); op2->SetRegOptional(); } else { - assert(supportsRegOptional); + assert(supportsOp3RegOptional); op3->SetRegOptional(); } } diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 5db3e179675308..c02d425fe2b526 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2398,7 +2398,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) assert(resultOpNum == 0); if (op1->isContained() || op1->IsRegOptional()) { - // In the case that result is writtent into destination that is different from any of the + // In the case that result is written into destination that is different from any of the // source operands, we set op2 to be tgtPrefUse when op1 is contained. tgtPrefUse = BuildUse(op2); From 2ca252482184b1bd0c7d1ac00f5ef00f8e9055ae Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 13 Oct 2021 11:04:02 -0700 Subject: [PATCH 26/38] Fix jit format --- src/coreclr/jit/lowerxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 28fb947c2b3e69..1f49963ee2aac0 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6338,7 +6338,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) bool supportsOp1RegOptional = false; bool supportsOp2RegOptional = false; bool supportsOp3RegOptional = false; - unsigned resultOpNum = 0; + unsigned resultOpNum = 0; LIR::Use use; if (BlockRange().TryGetUse(node, &use)) From 17bd967fe083606c22bacdb958e36f0fb4eb26a8 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Thu, 14 Oct 2021 08:12:51 -0700 Subject: [PATCH 27/38] Fix build node error for op1 is regOptional --- src/coreclr/jit/lsraxarch.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index c02d425fe2b526..09d903fa72636a 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2365,7 +2365,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (op1->isContained() || op1->IsRegOptional()) { // op2 = ([op1] * op2) + op3 - srcCount += BuildOperandUses(op1); + srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op2); srcCount += BuildDelayFreeUses(op3, op2); } else @@ -2383,7 +2383,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (op1->isContained() || op1->IsRegOptional()) { // op3 = ([op1] * op2) + op3 - srcCount += BuildOperandUses(op1); + srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op3); srcCount += BuildDelayFreeUses(op2, op3); } else @@ -2404,7 +2404,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op2); srcCount += 1; // result = ([op1] * op2) + op3 - srcCount += BuildOperandUses(op1); + srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op2); srcCount += BuildDelayFreeUses(op3, op2); } else From eed591286e809b105b727b2a6dcc12cf56b7a146 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 27 Oct 2021 20:40:18 -0700 Subject: [PATCH 28/38] Use targetReg instead of GetResultOpNumForFMA in codegen --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 61 +++++++++++---------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 04fbde028cf256..73d83ac5979b37 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2127,29 +2127,32 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) regNumber op1Reg; regNumber op2Reg; + regNumber op1RegOld = op1->GetRegNum(); + regNumber op2RegOld = op2->GetRegNum(); + regNumber op3RegOld = op3->GetRegNum(); + + // For special case that targetReg equals to more than one op Regs + bool sameReg = false; + if ((op1RegOld == op2RegOld && targetReg == op1RegOld) || (op1RegOld == op3RegOld && targetReg == op1RegOld) || + (op2RegOld == op3RegOld && targetReg == op1RegOld)) + sameReg = true; + bool isCommutative = false; const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - unsigned resultOpNum = 0; - LIR::Use use; - if (LIR::AsRange(compiler->compCurBB).TryGetUse(node, &use)) - { - resultOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); - } - // Intrinsics with CopyUpperBits semantics must have op1 as target - if (resultOpNum == 1 || copiesUpperBits) + if ((targetReg == op1RegOld || copiesUpperBits) && !sameReg) { if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 ins = _132form; - op1Reg = op1->GetRegNum(); - op2Reg = op3->GetRegNum(); + op1Reg = op1RegOld; + op2Reg = op3RegOld; op3 = op2; } else @@ -2157,39 +2160,39 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) assert(op3->isContained() || op3->IsRegOptional()); // op1 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); + op1Reg = op1RegOld; + op2Reg = op2RegOld; isCommutative = copiesUpperBits; } } - else if (resultOpNum == 3) + else if (targetReg == op3RegOld && !sameReg) { // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 ins = _231form; - op1Reg = op3->GetRegNum(); + op1Reg = op3RegOld; if (op1->isContained() || op1->IsRegOptional()) { // op3 = ([op1] * op2) + op3 - op2Reg = op2->GetRegNum(); + op2Reg = op2RegOld; op3 = op1; } else { assert(op2->isContained() || op2->IsRegOptional()); // op3 = (op1 * [op2]) + op3 - op2Reg = op1->GetRegNum(); + op2Reg = op1RegOld; op3 = op2; } } - else if (resultOpNum == 2) + else if (targetReg == op2RegOld && !sameReg) { if (op1->isContained() || op1->IsRegOptional()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 ins = _132form; - op1Reg = op2->GetRegNum(); - op2Reg = op3->GetRegNum(); + op1Reg = op2RegOld; + op2Reg = op3RegOld; op3 = op1; } else @@ -2197,21 +2200,23 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) assert(op3->isContained() || op3->IsRegOptional()); // op2 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op2->GetRegNum(); - op2Reg = op1->GetRegNum(); + op1Reg = op2RegOld; + op2Reg = op1RegOld; isCommutative = copiesUpperBits; } } else { - assert(resultOpNum == 0); + // For special case that targetReg not euqal to any of the op*Reg + // or targetReg equals to more than one op Regs + if (op1->isContained() || op1->IsRegOptional()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 ins = _132form; - op1Reg = op2->GetRegNum(); - op2Reg = op3->GetRegNum(); + op1Reg = op2RegOld; + op2Reg = op3RegOld; op3 = op1; } else @@ -2221,8 +2226,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // op1 = (op1 * [op2]) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 ins = _132form; - op1Reg = op1->GetRegNum(); - op2Reg = op3->GetRegNum(); + op1Reg = op1RegOld; + op2Reg = op3RegOld; op3 = op2; } else @@ -2230,8 +2235,8 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) assert(op3->isContained() || op3->IsRegOptional()); // op1 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); + op1Reg = op1RegOld; + op2Reg = op2RegOld; isCommutative = copiesUpperBits; } } From 43c5034a5eca614505646938548d1560b40b29b0 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 2 Nov 2021 15:34:29 -0700 Subject: [PATCH 29/38] Update variable names --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 53 ++++++++++----------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 73d83ac5979b37..bd62a4d83357d6 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2127,15 +2127,15 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) regNumber op1Reg; regNumber op2Reg; - regNumber op1RegOld = op1->GetRegNum(); - regNumber op2RegOld = op2->GetRegNum(); - regNumber op3RegOld = op3->GetRegNum(); + regNumber op1NodeReg = op1->GetRegNum(); + regNumber op2NodeReg = op2->GetRegNum(); + regNumber op3NodeReg = op3->GetRegNum(); // For special case that targetReg equals to more than one op Regs - bool sameReg = false; - if ((op1RegOld == op2RegOld && targetReg == op1RegOld) || (op1RegOld == op3RegOld && targetReg == op1RegOld) || - (op2RegOld == op3RegOld && targetReg == op1RegOld)) - sameReg = true; + bool uniqueOverwrittenReg = true; + if ((op1NodeReg == op2NodeReg && targetReg == op1NodeReg) || + (op1NodeReg == op3NodeReg && targetReg == op1NodeReg) || (op2NodeReg == op3NodeReg && targetReg == op2NodeReg)) + uniqueOverwrittenReg = false; bool isCommutative = false; const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); @@ -2144,15 +2144,15 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) assert(!copiesUpperBits || !op1->isContained()); // Intrinsics with CopyUpperBits semantics must have op1 as target - if ((targetReg == op1RegOld || copiesUpperBits) && !sameReg) + if ((targetReg == op1NodeReg || copiesUpperBits) && uniqueOverwrittenReg) { + op1Reg = op1NodeReg; if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 ins = _132form; - op1Reg = op1RegOld; - op2Reg = op3RegOld; + op2Reg = op3NodeReg; op3 = op2; } else @@ -2160,39 +2160,38 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) assert(op3->isContained() || op3->IsRegOptional()); // op1 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op1RegOld; - op2Reg = op2RegOld; + op2Reg = op2NodeReg; isCommutative = copiesUpperBits; } } - else if (targetReg == op3RegOld && !sameReg) + else if (targetReg == op3NodeReg && uniqueOverwrittenReg) { // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 ins = _231form; - op1Reg = op3RegOld; + op1Reg = op3NodeReg; if (op1->isContained() || op1->IsRegOptional()) { // op3 = ([op1] * op2) + op3 - op2Reg = op2RegOld; + op2Reg = op2NodeReg; op3 = op1; } else { assert(op2->isContained() || op2->IsRegOptional()); // op3 = (op1 * [op2]) + op3 - op2Reg = op1RegOld; + op2Reg = op1NodeReg; op3 = op2; } } - else if (targetReg == op2RegOld && !sameReg) + else if (targetReg == op2NodeReg && uniqueOverwrittenReg) { + op1Reg = op2NodeReg; if (op1->isContained() || op1->IsRegOptional()) { // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 ins = _132form; - op1Reg = op2RegOld; - op2Reg = op3RegOld; + op2Reg = op3NodeReg; op3 = op1; } else @@ -2200,14 +2199,13 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) assert(op3->isContained() || op3->IsRegOptional()); // op2 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op2RegOld; - op2Reg = op1RegOld; + op2Reg = op1NodeReg; isCommutative = copiesUpperBits; } } else { - // For special case that targetReg not euqal to any of the op*Reg + // For special case where targetReg is not euqal to any of the op*Reg // or targetReg equals to more than one op Regs if (op1->isContained() || op1->IsRegOptional()) @@ -2215,19 +2213,19 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 ins = _132form; - op1Reg = op2RegOld; - op2Reg = op3RegOld; + op1Reg = op2NodeReg; + op2Reg = op3NodeReg; op3 = op1; } else { + op1Reg = op1NodeReg; if (op2->isContained() || op2->IsRegOptional()) { // op1 = (op1 * [op2]) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 ins = _132form; - op1Reg = op1RegOld; - op2Reg = op3RegOld; + op2Reg = op3NodeReg; op3 = op2; } else @@ -2235,8 +2233,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) assert(op3->isContained() || op3->IsRegOptional()); // op1 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op1Reg = op1RegOld; - op2Reg = op2RegOld; + op2Reg = op2NodeReg; isCommutative = copiesUpperBits; } } From 5ef70a5260db737f10011512f3b6677efd7b1e49 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Sun, 7 Nov 2021 11:47:25 -0800 Subject: [PATCH 30/38] Refactor lsra to solve lastUse status changed caused assertion failure --- src/coreclr/jit/gentree.cpp | 21 ++++---- src/coreclr/jit/lsraxarch.cpp | 90 +++++++++++------------------------ 2 files changed, 38 insertions(+), 73 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 25386c820c1fe0..1820cea394a339 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21825,7 +21825,7 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const // // Return Value: // The operand number overwritten or lastUse. 0 is the default value, where the result is written into -// a destination that is not one of the source operands. +// a destination that is not one of the source operands and there is no last use op. // unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3) { @@ -21850,17 +21850,16 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, Ge return 3; } } - else - { - // For LclVar, check if any op is lastUse - if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) - return 1; - else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) - return 3; - else - return 2; - } + // If no overwritten op, check if there is any last use op + + if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) + return 1; + else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) + return 3; + else if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0)) + return 2; + return 0; } #endif // TARGET_XARCH && FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 09d903fa72636a..97161e8db95473 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2339,94 +2339,60 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) assert(!copiesUpperBits || !op1->isContained()); // Intrinsics with CopyUpperBits semantics must have op1 as target - if (resultOpNum == 1 || copiesUpperBits) + if ((op1->isContained() || op1->IsRegOptional()) && !copiesUpperBits) { - tgtPrefUse = BuildUse(op1); - srcCount += 1; - - if (op2->isContained() || op2->IsRegOptional()) + if (resultOpNum == 3) { - // op1 = (op1 * [op2]) + op3 - srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); - srcCount += BuildDelayFreeUses(op3, op1); + tgtPrefUse = BuildUse(op3); + // op3 = ([op1] * op2) + op3 + srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op3); + srcCount += BuildDelayFreeUses(op2, op3); } else { - // op1 = (op1 * op2) + [op3] - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); - srcCount += BuildDelayFreeUses(op2, op1); - } - } - else if (resultOpNum == 2) - { - tgtPrefUse = BuildUse(op2); - srcCount += 1; - - if (op1->isContained() || op1->IsRegOptional()) - { + tgtPrefUse = BuildUse(op2); // op2 = ([op1] * op2) + op3 srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op2); srcCount += BuildDelayFreeUses(op3, op2); } - else + srcCount += 1; + } + else if (op3->isContained() || op3->IsRegOptional()) + { + if (resultOpNum == 2 && !copiesUpperBits) { + tgtPrefUse = BuildUse(op2); // op2 = (op1 * op2) + [op3] srcCount += BuildDelayFreeUses(op1, op2); srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); } - } - else if (resultOpNum == 3) - { - tgtPrefUse = BuildUse(op3); - srcCount += 1; - - if (op1->isContained() || op1->IsRegOptional()) - { - // op3 = ([op1] * op2) + op3 - srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op3); - srcCount += BuildDelayFreeUses(op2, op3); - } else { - // op3 = (op1 * [op2]) + op3 - srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); - srcCount += BuildDelayFreeUses(op1, op3); + tgtPrefUse = BuildUse(op1); + // op1 = (op1 * op2) + [op3] + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); + srcCount += BuildDelayFreeUses(op2, op1); } + srcCount += 1; } else { - assert(resultOpNum == 0); - if (op1->isContained() || op1->IsRegOptional()) + assert(op2->isContained() || op2->IsRegOptional()); + if (resultOpNum == 3 && !copiesUpperBits) { - // In the case that result is written into destination that is different from any of the - // source operands, we set op2 to be tgtPrefUse when op1 is contained. - - tgtPrefUse = BuildUse(op2); - srcCount += 1; - // result = ([op1] * op2) + op3 - srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op2); - srcCount += BuildDelayFreeUses(op3, op2); + tgtPrefUse = BuildUse(op3); + // op3 = (op1 * [op2]) + op3 + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); + srcCount += BuildDelayFreeUses(op1, op3); } else { - // When op1 is not contained, we set op1 to be tgtPrefUse. - tgtPrefUse = BuildUse(op1); - srcCount += 1; - - if (op2->isContained() || op2->IsRegOptional()) - { - // result = (op1 * [op2]) + op3 - srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); - srcCount += BuildDelayFreeUses(op3, op1); - } - else - { - // result = (op1 * op2) + [op3] - srcCount += BuildDelayFreeUses(op2, op1); - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); - } + // op1 = (op1 * [op2]) + op3 + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); + srcCount += BuildDelayFreeUses(op3, op1); } + srcCount += 1; } buildUses = false; From bfa6924b1feb925354937e040d6d492bdf15d8c0 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Sun, 7 Nov 2021 14:19:33 -0800 Subject: [PATCH 31/38] Add check to prioritize contained op in lsra --- src/coreclr/jit/lsraxarch.cpp | 48 ++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 97161e8db95473..b368d0efdf6f46 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2338,8 +2338,50 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); + // Prioritize contained node by checking if any op is contained first. + // If none of them is contained, check if any op is regOptional. + bool hasContainedOp = false; + unsigned containedOpNum = 0; + + if (op1->isContained() || op2->isContained() || op3->isContained()) + { + hasContainedOp = true; + } + if (hasContainedOp) + { + if (op1->isContained()) + { + containedOpNum = 1; + } + else if (op2->isContained()) + { + containedOpNum = 2; + } + else + { + assert(op3->isContained()); + containedOpNum = 3; + } + } + else + { + if (op1->IsRegOptional()) + { + containedOpNum = 1; + } + else if (op2->IsRegOptional()) + { + containedOpNum = 2; + } + else + { + assert(op3->IsRegOptional()); + containedOpNum = 3; + } + } + // Intrinsics with CopyUpperBits semantics must have op1 as target - if ((op1->isContained() || op1->IsRegOptional()) && !copiesUpperBits) + if ( containedOpNum == 1 && !copiesUpperBits) { if (resultOpNum == 3) { @@ -2357,7 +2399,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } srcCount += 1; } - else if (op3->isContained() || op3->IsRegOptional()) + else if (containedOpNum == 3) { if (resultOpNum == 2 && !copiesUpperBits) { @@ -2377,7 +2419,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else { - assert(op2->isContained() || op2->IsRegOptional()); + assert(containedOpNum == 2); if (resultOpNum == 3 && !copiesUpperBits) { tgtPrefUse = BuildUse(op3); From 12f260b47fe9e9e89579293b73f1957cc9e94ed0 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Sun, 7 Nov 2021 15:47:27 -0800 Subject: [PATCH 32/38] Update for jit format --- src/coreclr/jit/lsraxarch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index b368d0efdf6f46..8ac49a16780ff6 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2340,7 +2340,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Prioritize contained node by checking if any op is contained first. // If none of them is contained, check if any op is regOptional. - bool hasContainedOp = false; + bool hasContainedOp = false; unsigned containedOpNum = 0; if (op1->isContained() || op2->isContained() || op3->isContained()) @@ -2381,7 +2381,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } // Intrinsics with CopyUpperBits semantics must have op1 as target - if ( containedOpNum == 1 && !copiesUpperBits) + if (containedOpNum == 1 && !copiesUpperBits) { if (resultOpNum == 3) { From 5ca658ebf53beee4d84446236f9391fba9e3e935 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 16 Nov 2021 22:47:35 -0800 Subject: [PATCH 33/38] Simplify code --- src/coreclr/jit/gentree.cpp | 2 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 118 ++++++-------------- src/coreclr/jit/lowerxarch.cpp | 4 +- src/coreclr/jit/lsraxarch.cpp | 97 +++++----------- 4 files changed, 70 insertions(+), 151 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 1820cea394a339..9fdceae44b5b24 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21831,7 +21831,7 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, Ge { // only FMA intrinsic node should call into this function assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA); - if (use->OperIs(GT_STORE_LCL_VAR)) + if (use != nullptr && use->OperIs(GT_STORE_LCL_VAR)) { // For store_lcl_var, check if any op is overwritten diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index bd62a4d83357d6..14351cf58eee8c 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2124,18 +2124,13 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) argList = argList->Rest(); GenTree* op3 = argList->Current(); - regNumber op1Reg; - regNumber op2Reg; - regNumber op1NodeReg = op1->GetRegNum(); regNumber op2NodeReg = op2->GetRegNum(); regNumber op3NodeReg = op3->GetRegNum(); - // For special case that targetReg equals to more than one op Regs - bool uniqueOverwrittenReg = true; - if ((op1NodeReg == op2NodeReg && targetReg == op1NodeReg) || - (op1NodeReg == op3NodeReg && targetReg == op1NodeReg) || (op2NodeReg == op3NodeReg && targetReg == op2NodeReg)) - uniqueOverwrittenReg = false; + GenTree* emitOp1 = op1; + GenTree* emitOp2 = op2; + GenTree* emitOp3 = op3; bool isCommutative = false; const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); @@ -2143,101 +2138,60 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - // Intrinsics with CopyUpperBits semantics must have op1 as target - if ((targetReg == op1NodeReg || copiesUpperBits) && uniqueOverwrittenReg) + if (op1->isContained() || op1->isUsedFromSpillTemp()) { - op1Reg = op1NodeReg; - if (op2->isContained() || op2->IsRegOptional()) + if (targetReg == op2NodeReg) { - // op1 = (op1 * [op2]) + op3 + std::swap(emitOp1, emitOp2); + // op2 = ([op1] * op2) + op3 // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = _132form; - op2Reg = op3NodeReg; - op3 = op2; + ins = _132form; + std::swap(emitOp2, emitOp3); } else { - assert(op3->isContained() || op3->IsRegOptional()); - // op1 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op2Reg = op2NodeReg; - isCommutative = copiesUpperBits; + // targetReg == op3NodeReg or targetReg == ? + // op3 = ([op1] * op2) + op3 + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + ins = _231form; + std::swap(emitOp1, emitOp3); } } - else if (targetReg == op3NodeReg && uniqueOverwrittenReg) + else if (op2->isContained() || op2->isUsedFromSpillTemp()) { - // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 - ins = _231form; - op1Reg = op3NodeReg; - if (op1->isContained() || op1->IsRegOptional()) + if (targetReg == op3NodeReg) { - // op3 = ([op1] * op2) + op3 - op2Reg = op2NodeReg; - op3 = op1; + // op3 = (op1 * [op2]) + op3 + // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 + ins = _231form; + std::swap(emitOp1, emitOp3); } else { - assert(op2->isContained() || op2->IsRegOptional()); - // op3 = (op1 * [op2]) + op3 - op2Reg = op1NodeReg; - op3 = op2; + // targetReg == op1NodeReg or targetReg == ? + // op1 = (op1 * [op2]) + op3 + // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 + ins = _132form; } + std::swap(emitOp2, emitOp3); } - else if (targetReg == op2NodeReg && uniqueOverwrittenReg) + else { - op1Reg = op2NodeReg; - if (op1->isContained() || op1->IsRegOptional()) - { - // op2 = ([op1] * op2) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = _132form; - op2Reg = op3NodeReg; - op3 = op1; - } - else + // targetReg could be op1NodeReg, op2NodeReg, or not equal to any op + // op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3] + // ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3 + // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] + isCommutative = copiesUpperBits; + if (targetReg == op2NodeReg) { - assert(op3->isContained() || op3->IsRegOptional()); // op2 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op2Reg = op1NodeReg; - isCommutative = copiesUpperBits; + std::swap(emitOp1, emitOp2); } } - else - { - // For special case where targetReg is not euqal to any of the op*Reg - // or targetReg equals to more than one op Regs - if (op1->isContained() || op1->IsRegOptional()) - { - // op2 = ([op1] * op2) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = _132form; - op1Reg = op2NodeReg; - op2Reg = op3NodeReg; - op3 = op1; - } - else - { - op1Reg = op1NodeReg; - if (op2->isContained() || op2->IsRegOptional()) - { - // op1 = (op1 * [op2]) + op3 - // 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 - ins = _132form; - op2Reg = op3NodeReg; - op3 = op2; - } - else - { - assert(op3->isContained() || op3->IsRegOptional()); - // op1 = (op1 * op2) + [op3] - // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - op2Reg = op2NodeReg; - isCommutative = copiesUpperBits; - } - } - } + regNumber op1Reg = emitOp1->GetRegNum(); + regNumber op2Reg = emitOp2->GetRegNum(); if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) { @@ -2253,7 +2207,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) op2Reg = op1Reg; op1Reg = targetReg; } - genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, op3); + genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, emitOp3); genProduceReg(node); } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 1f49963ee2aac0..107576f082793d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6340,11 +6340,13 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) bool supportsOp3RegOptional = false; unsigned resultOpNum = 0; LIR::Use use; + GenTree* user = nullptr; if (BlockRange().TryGetUse(node, &use)) { - resultOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3); + user = use.User(); } + resultOpNum = node->GetResultOpNumForFMA(user, op1, op2, op3); // Prioritize Containable op. Check if any one of the op is containable first. // Set op regOptional only if none of them is containable. diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 8ac49a16780ff6..ee6562af49ef80 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2330,112 +2330,75 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) unsigned resultOpNum = 0; LIR::Use use; + GenTree* user = nullptr; + if (LIR::AsRange(blockSequence[curBBSeqNum]).TryGetUse(intrinsicTree, &use)) { - resultOpNum = intrinsicTree->GetResultOpNumForFMA(use.User(), op1, op2, op3); + user = use.User(); } + resultOpNum = intrinsicTree->GetResultOpNumForFMA(user, op1, op2, op3); // Intrinsics with CopyUpperBits semantics cannot have op1 be contained assert(!copiesUpperBits || !op1->isContained()); - // Prioritize contained node by checking if any op is contained first. - // If none of them is contained, check if any op is regOptional. - bool hasContainedOp = false; unsigned containedOpNum = 0; - if (op1->isContained() || op2->isContained() || op3->isContained()) + if (op1->isContained() || op1->IsRegOptional()) { - hasContainedOp = true; + containedOpNum = 1; } - if (hasContainedOp) + else if (op2->isContained() || op2->IsRegOptional()) { - if (op1->isContained()) - { - containedOpNum = 1; - } - else if (op2->isContained()) - { - containedOpNum = 2; - } - else - { - assert(op3->isContained()); - containedOpNum = 3; - } + containedOpNum = 2; } else { - if (op1->IsRegOptional()) - { - containedOpNum = 1; - } - else if (op2->IsRegOptional()) - { - containedOpNum = 2; - } - else - { - assert(op3->IsRegOptional()); - containedOpNum = 3; - } + assert(op3->isContained() || op3->IsRegOptional()); + containedOpNum = 3; } + GenTree* emitOp1 = op1; + GenTree* emitOp2 = op2; + GenTree* emitOp3 = op3; + // Intrinsics with CopyUpperBits semantics must have op1 as target if (containedOpNum == 1 && !copiesUpperBits) { - if (resultOpNum == 3) - { - tgtPrefUse = BuildUse(op3); - // op3 = ([op1] * op2) + op3 - srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op3); - srcCount += BuildDelayFreeUses(op2, op3); - } - else + if (resultOpNum != 3) { - tgtPrefUse = BuildUse(op2); // op2 = ([op1] * op2) + op3 - srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op2); - srcCount += BuildDelayFreeUses(op3, op2); + std::swap(emitOp2, emitOp3); } - srcCount += 1; + + // else: op3 = ([op1] * op2) + op3 + std::swap(emitOp1, emitOp3); } else if (containedOpNum == 3) { if (resultOpNum == 2 && !copiesUpperBits) { - tgtPrefUse = BuildUse(op2); // op2 = (op1 * op2) + [op3] - srcCount += BuildDelayFreeUses(op1, op2); - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); - } - else - { - tgtPrefUse = BuildUse(op1); - // op1 = (op1 * op2) + [op3] - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); - srcCount += BuildDelayFreeUses(op2, op1); + std::swap(emitOp1, emitOp2); } - srcCount += 1; + // else: op1 = (op1 * op2) + [op3] } else { assert(containedOpNum == 2); + // op1 = (op1 * [op2]) + op3 + std::swap(emitOp2, emitOp3); + if (resultOpNum == 3 && !copiesUpperBits) { - tgtPrefUse = BuildUse(op3); // op3 = (op1 * [op2]) + op3 - srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); - srcCount += BuildDelayFreeUses(op1, op3); + std::swap(emitOp1, emitOp2); } - else - { - tgtPrefUse = BuildUse(op1); - // op1 = (op1 * [op2]) + op3 - srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); - srcCount += BuildDelayFreeUses(op3, op1); - } - srcCount += 1; } + tgtPrefUse = BuildUse(emitOp1); + + srcCount += 1; + srcCount += BuildDelayFreeUses(emitOp2, emitOp1); + srcCount += emitOp3->isContained() ? BuildOperandUses(emitOp3) : BuildDelayFreeUses(emitOp3, emitOp1); buildUses = false; break; From ec4ef664fa65f4be31eda2d47a8d58268e4f1395 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Wed, 17 Nov 2021 13:34:55 -0800 Subject: [PATCH 34/38] Resolve comments --- src/coreclr/jit/gentree.cpp | 4 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 12 ++---- src/coreclr/jit/lowerxarch.cpp | 30 +++++++------- src/coreclr/jit/lsraxarch.cpp | 44 ++++++++++++++------- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9fdceae44b5b24..59a5eafe09d246 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21855,10 +21855,10 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, Ge if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) return 1; - else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) - return 3; else if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0)) return 2; + else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0)) + return 3; return 0; } diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 14351cf58eee8c..630404c134dd8a 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2132,7 +2132,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) GenTree* emitOp2 = op2; GenTree* emitOp3 = op3; - bool isCommutative = false; const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId); // Intrinsics with CopyUpperBits semantics cannot have op1 be contained @@ -2181,7 +2180,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3] // ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3 // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - isCommutative = copiesUpperBits; if (targetReg == op2NodeReg) { // op2 = (op1 * op2) + [op3] @@ -2190,10 +2188,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } } - regNumber op1Reg = emitOp1->GetRegNum(); - regNumber op2Reg = emitOp2->GetRegNum(); - - if (isCommutative && (op1Reg != targetReg) && (op2Reg == targetReg)) + if (!copiesUpperBits && (emitOp2->GetRegNum() == targetReg)) { assert(node->isRMWHWIntrinsic(compiler)); @@ -2204,10 +2199,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // as target. However, for commutative intrinsics, we can just swap the operands // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. - op2Reg = op1Reg; - op1Reg = targetReg; + std::swap(emitOp1, emitOp2); } - genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, op1Reg, op2Reg, emitOp3); + genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3); genProduceReg(node); } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 107576f082793d..395099da3cac6d 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6351,36 +6351,36 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // Prioritize Containable op. Check if any one of the op is containable first. // Set op regOptional only if none of them is containable. - if (resultOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsOp1RegOptional) && - !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + // Prefer to make op3 contained, + if (resultOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsOp3RegOptional)) { - // result = ([op1] * op2) + op3 - MakeSrcContained(node, op1); + // result = (op1 * op2) + [op3] + MakeSrcContained(node, op3); } else if (resultOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsOp2RegOptional)) { // result = (op1 * [op2]) + op3 MakeSrcContained(node, op2); } - else if (resultOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsOp3RegOptional)) + else if (resultOpNum != 1 && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId) && + IsContainableHWIntrinsicOp(node, op1, &supportsOp1RegOptional)) { - // result = (op1 * op2) + [op3] - MakeSrcContained(node, op3); + // result = ([op1] * op2) + op3 + MakeSrcContained(node, op1); } - else if (resultOpNum != 1 && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) + else if (supportsOp3RegOptional) { - assert(supportsOp1RegOptional); - op1->SetRegOptional(); + assert(resultOpNum != 3); + op3->SetRegOptional(); } - else if (resultOpNum != 2) + else if (supportsOp2RegOptional) { - assert(supportsOp2RegOptional); + assert(resultOpNum != 2); op2->SetRegOptional(); } - else + else if (supportsOp1RegOptional) { - assert(supportsOp3RegOptional); - op3->SetRegOptional(); + op1->SetRegOptional(); } } else diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index ee6562af49ef80..ecf6023e93d537 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2338,11 +2338,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } resultOpNum = intrinsicTree->GetResultOpNumForFMA(user, op1, op2, op3); - // Intrinsics with CopyUpperBits semantics cannot have op1 be contained - assert(!copiesUpperBits || !op1->isContained()); - unsigned containedOpNum = 0; + // containedOpNum remains 0 when no op is contianed or regOptional if (op1->isContained() || op1->IsRegOptional()) { containedOpNum = 1; @@ -2351,9 +2349,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { containedOpNum = 2; } - else + else if (op3->isContained() || op3->IsRegOptional()) { - assert(op3->isContained() || op3->IsRegOptional()); containedOpNum = 3; } @@ -2362,38 +2359,55 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) GenTree* emitOp3 = op3; // Intrinsics with CopyUpperBits semantics must have op1 as target - if (containedOpNum == 1 && !copiesUpperBits) + assert(containedOpNum != 1 || !copiesUpperBits); + + if (containedOpNum == 1) { - if (resultOpNum != 3) + assert(containedOpNum != resultOpNum); + // resultOpNum is 3 or 0: op3/? = ([op1] * op2) + op3 + std::swap(emitOp1, emitOp3); + + if (resultOpNum == 2) { // op2 = ([op1] * op2) + op3 std::swap(emitOp2, emitOp3); } - - // else: op3 = ([op1] * op2) + op3 - std::swap(emitOp1, emitOp3); } else if (containedOpNum == 3) { + assert(containedOpNum != resultOpNum); if (resultOpNum == 2 && !copiesUpperBits) { // op2 = (op1 * op2) + [op3] std::swap(emitOp1, emitOp2); } - // else: op1 = (op1 * op2) + [op3] + // else: op1/? = (op1 * op2) + [op3] } - else + else if (containedOpNum == 2) { - assert(containedOpNum == 2); - // op1 = (op1 * [op2]) + op3 - std::swap(emitOp2, emitOp3); + assert(containedOpNum != resultOpNum); + // op1/? = (op1 * [op2]) + op3 + std::swap(emitOp2, emitOp3); if (resultOpNum == 3 && !copiesUpperBits) { // op3 = (op1 * [op2]) + op3 std::swap(emitOp1, emitOp2); } } + else + { + // containedOpNum == 0 + // no extra work when resultOpNum is 0 or 1 + if (resultOpNum == 2) + { + std::swap(emitOp1, emitOp2); + } + else if (resultOpNum == 3) + { + std::swap(emitOp1, emitOp3); + } + } tgtPrefUse = BuildUse(emitOp1); srcCount += 1; From aa93a853b354e47e63eddbf8cb46dc2049d8b0b1 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Fri, 19 Nov 2021 13:27:58 -0800 Subject: [PATCH 35/38] Comment out assert because of lastUse change --- src/coreclr/jit/lsraxarch.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index ecf6023e93d537..cf00d494ad0d96 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2363,7 +2363,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (containedOpNum == 1) { - assert(containedOpNum != resultOpNum); + // resultOpNum might change between lowering and lsra, comment out assertion for now. + // assert(containedOpNum != resultOpNum); // resultOpNum is 3 or 0: op3/? = ([op1] * op2) + op3 std::swap(emitOp1, emitOp3); @@ -2375,7 +2376,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else if (containedOpNum == 3) { - assert(containedOpNum != resultOpNum); + // assert(containedOpNum != resultOpNum); if (resultOpNum == 2 && !copiesUpperBits) { // op2 = (op1 * op2) + [op3] @@ -2385,7 +2386,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) } else if (containedOpNum == 2) { - assert(containedOpNum != resultOpNum); + // assert(containedOpNum != resultOpNum); // op1/? = (op1 * [op2]) + op3 std::swap(emitOp2, emitOp3); From c66a018532257e0b540b2e8d0c1ad5a87a3e9223 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Mon, 22 Nov 2021 11:35:06 -0800 Subject: [PATCH 36/38] Fix some copiesUpperBits related errors --- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 630404c134dd8a..d2785499b3d2ea 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2158,7 +2158,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } else if (op2->isContained() || op2->isUsedFromSpillTemp()) { - if (targetReg == op3NodeReg) + if (!copiesUpperBits && (targetReg == op3NodeReg)) { // op3 = (op1 * [op2]) + op3 // 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 @@ -2180,7 +2180,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3] // ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3 // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] - if (targetReg == op2NodeReg) + if (!copiesUpperBits && (targetReg == op2NodeReg)) { // op2 = (op1 * op2) + [op3] // 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] @@ -2188,19 +2188,6 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) } } - if (!copiesUpperBits && (emitOp2->GetRegNum() == targetReg)) - { - assert(node->isRMWHWIntrinsic(compiler)); - - // We have "reg2 = (reg1 * reg2) +/- op3" where "reg1 != reg2" on a RMW intrinsic. - // - // For non-commutative intrinsics, we should have ensured that op2 was marked - // delay free in order to prevent it from getting assigned the same register - // as target. However, for commutative intrinsics, we can just swap the operands - // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. - - std::swap(emitOp1, emitOp2); - } genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3); genProduceReg(node); } From a4657c76a764702e24e5daef8abada2f4c9bd3ce Mon Sep 17 00:00:00 2001 From: weilinwa Date: Tue, 30 Nov 2021 14:35:17 -0800 Subject: [PATCH 37/38] Update src/coreclr/jit/lsraxarch.cpp Co-authored-by: Kunal Pathak --- src/coreclr/jit/lsraxarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 7100883c8cb372..dd9d004268f76d 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2284,7 +2284,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) unsigned containedOpNum = 0; - // containedOpNum remains 0 when no op is contianed or regOptional + // containedOpNum remains 0 when no operand is contained or regOptional if (op1->isContained() || op1->IsRegOptional()) { containedOpNum = 1; From 75d7a37ac7c02acec35117a2a58c47cdcd36dc69 Mon Sep 17 00:00:00 2001 From: Weilin Wang Date: Tue, 30 Nov 2021 15:33:44 -0800 Subject: [PATCH 38/38] Add link to the new issue --- src/coreclr/jit/gentree.cpp | 1 + src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 2 +- src/coreclr/jit/lsraxarch.cpp | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 644783492a6d83..0ba7b5550feaf5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22475,6 +22475,7 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, Ge } // If no overwritten op, check if there is any last use op + // https://github.com/dotnet/runtime/issues/62215 if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0)) return 1; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 7759da3b1e241e..bb6a6daa815d8b 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -2041,7 +2041,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) GenTree* op2 = node->Op(2); GenTree* op3 = node->Op(3); - regNumber targetReg = node->GetRegNum(); + regNumber targetReg = node->GetRegNum(); genConsumeMultiOpOperands(node); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index dd9d004268f76d..ebd80b5c134d81 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2307,6 +2307,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (containedOpNum == 1) { + // https://github.com/dotnet/runtime/issues/62215 // resultOpNum might change between lowering and lsra, comment out assertion for now. // assert(containedOpNum != resultOpNum); // resultOpNum is 3 or 0: op3/? = ([op1] * op2) + op3