Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ee2c0b6
Optimize FMA codegen base on the overwritten
weilinwa Jul 20, 2021
46d0011
Improve function/var names
weilinwa Aug 27, 2021
cce4bda
Add assertions
weilinwa Aug 27, 2021
b825291
Get use of FMA with TryGetUse
weilinwa Sep 7, 2021
f615e39
Decide FMA form with two conditions, OverwrittenOpNum and isContained
weilinwa Sep 8, 2021
b698036
Fix op reg error in codegen
weilinwa Sep 10, 2021
7d9c0d6
Decide form using lastUse and isContained in no overwritten case
weilinwa Sep 15, 2021
1344d92
Clean up code
weilinwa Sep 18, 2021
029a9b5
Separate default case overwrittenOpNum==0
weilinwa Sep 20, 2021
f2a371f
Apply format patch
weilinwa Sep 29, 2021
9955389
Change variable and function names
weilinwa Oct 1, 2021
7c56653
Update regOptional for op1 and resolve some other comments
weilinwa Oct 5, 2021
1d51caa
Optimize FMA codegen base on the overwritten
weilinwa Jul 20, 2021
091133e
Improve function/var names
weilinwa Aug 27, 2021
9a6ae44
Add assertions
weilinwa Aug 27, 2021
ffcff76
Get use of FMA with TryGetUse
weilinwa Sep 7, 2021
5641f8f
Decide FMA form with two conditions, OverwrittenOpNum and isContained
weilinwa Sep 8, 2021
b7312ac
Fix op reg error in codegen
weilinwa Sep 10, 2021
a325fe3
Decide form using lastUse and isContained in no overwritten case
weilinwa Sep 15, 2021
0f950dd
Clean up code
weilinwa Sep 18, 2021
33a596d
Separate default case overwrittenOpNum==0
weilinwa Sep 20, 2021
5da9368
Apply format patch
weilinwa Sep 29, 2021
c3a9f07
Change variable and function names
weilinwa Oct 1, 2021
9e356aa
Update regOptional for op1 and resolve some other comments
weilinwa Oct 5, 2021
f8159bc
Change var names
weilinwa Oct 13, 2021
18bbe4d
Resolve merge conflicts.
weilinwa Oct 13, 2021
2ca2524
Fix jit format
weilinwa Oct 13, 2021
17bd967
Fix build node error for op1 is regOptional
weilinwa Oct 14, 2021
eed5912
Use targetReg instead of GetResultOpNumForFMA in codegen
weilinwa Oct 28, 2021
43c5034
Update variable names
weilinwa Nov 2, 2021
5ef70a5
Refactor lsra to solve lastUse status changed caused assertion failure
weilinwa Nov 7, 2021
bfa6924
Add check to prioritize contained op in lsra
weilinwa Nov 7, 2021
12f260b
Update for jit format
weilinwa Nov 7, 2021
5ca658e
Simplify code
weilinwa Nov 17, 2021
ec4ef66
Resolve comments
weilinwa Nov 17, 2021
aa93a85
Comment out assert because of lastUse change
weilinwa Nov 19, 2021
c66a018
Fix some copiesUpperBits related errors
weilinwa Nov 22, 2021
ff5a433
Merge branch 'main' into fma_opt
weilinwa Nov 22, 2021
a4657c7
Update src/coreclr/jit/lsraxarch.cpp
weilinwa Nov 30, 2021
75d7a37
Add link to the new issue
weilinwa Nov 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Update regOptional for op1 and resolve some other comments
  • Loading branch information
weilinwa committed Oct 5, 2021
commit 7c566534bfcf5a709925ce8022ac452f9f513f95
26 changes: 15 additions & 11 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19797,23 +19797,17 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const
// In the case that none of the operand is overwritten, check if any of them is lastUse.
//
// Return Value:
// The operand number overwritten or lastUse. 0 is the default value, where the result is written into a destination that is not one of the source operands.
// The operand number overwritten or lastUse. 0 is the default value, where the result is written into
// a destination that is not one of the source operands.
//
unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3)
{
// only FMA intrinsic node should call into this function
assert(HWIntrinsicInfo::lookupIsa(gtHWIntrinsicId) == InstructionSet_FMA);
if (!use->OperIs(GT_STORE_LCL_VAR))
{
if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0))
return 1;
else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0))
return 3;
else
return 2;
}
else
if (use->OperIs(GT_STORE_LCL_VAR))
{
// For store_lcl_var, check if any op is overwritten

GenTreeLclVarCommon* overwritten = use->AsLclVarCommon();
unsigned overwrittenLclNum = overwritten->GetLclNum();
if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwrittenLclNum)
Expand All @@ -19829,7 +19823,17 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, Ge
return 3;
}
}
else
{
// For LclVar, check if any op is lastUse

if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0))
return 1;
else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0))
return 3;
else
return 2;
}
return 0;
}
#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS
Expand Down
27 changes: 16 additions & 11 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2156,34 +2156,36 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
}
else
{
assert(op3->isContained() || op3->IsRegOptional());
// op1 = (op1 * op2) + [op3]
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
op1Reg = op1->GetRegNum();
op2Reg = op2->GetRegNum();
op1Reg = op1->GetRegNum();
op2Reg = op2->GetRegNum();
isCommutative = copiesUpperBits;
}
isCommutative = copiesUpperBits;
}
else if (resultOpNum == 3)
{
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1
ins = _232form;
op1Reg = op3->GetRegNum();
if (op1->isContained())
if (op1->isContained() || op1->IsRegOptional())
{
// op3 = ([op1] * op2) + op3
op2Reg = op2->GetRegNum();
op3 = op1;
}
else
{
assert(op2->isContained() || op2->IsRegOptional());
// op3 = (op1 * [op2]) + op3
op2Reg = op1->GetRegNum();
op3 = op2;
}
}
else if (resultOpNum == 2)
{
if (op1->isContained())
if (op1->isContained() || op1->IsRegOptional())
{
// op2 = ([op1] * op2) + op3
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
Expand All @@ -2194,16 +2196,18 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
}
else
{
assert(op3->isContained() || op3->IsRegOptional());
// op2 = (op1 * op2) + [op3]
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
op1Reg = op2->GetRegNum();
op2Reg = op1->GetRegNum();
op1Reg = op2->GetRegNum();
op2Reg = op1->GetRegNum();
isCommutative = copiesUpperBits;
}
}
else
{
assert(resultOpNum == 0);
if (op1->isContained())
if (op1->isContained() || op1->IsRegOptional())
{
// op2 = ([op1] * op2) + op3
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
Expand All @@ -2225,12 +2229,13 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
}
else
{
assert(op3->isContained() || op3->IsRegOptional());
// op1 = (op1 * op2) + [op3]
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
op1Reg = op1->GetRegNum();
op2Reg = op2->GetRegNum();
op1Reg = op1->GetRegNum();
op2Reg = op2->GetRegNum();
isCommutative = copiesUpperBits;
}
isCommutative = copiesUpperBits;
}
}

Expand Down
22 changes: 13 additions & 9 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6313,35 +6313,39 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
{
if ((intrinsicId >= NI_FMA_MultiplyAdd) && (intrinsicId <= NI_FMA_MultiplySubtractNegatedScalar))
{
bool supportsRegOptional = false;
bool supportsRegOptional2 = false;
unsigned resultOpNum = 0;
bool supportsRegOptional = false;
unsigned overwrittenOpNum = 0;
LIR::Use use;

if (BlockRange().TryGetUse(node, &use))
{
resultOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3);
overwrittenOpNum = node->GetResultOpNumForFMA(use.User(), op1, op2, op3);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: This variable name should probably be changed to resultOpNum corresponding to the GetResultOpNumForFMA()?

}

if (resultOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) &&
if (overwrittenOpNum != 1 && IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional) &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see what you are doing here - If a operand is containable and if it is not the overwritten operand, then we mark it as contained, but the logic will be clear if you flip the conditions inside if() i.e. do something like:

// This will say that if op2 is containable and it is not overwritten, then make it contained. Easy readiblity.
if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2) && overwrittenOpNum != 2)
{ .. }

I also didn't understand the last else if and else for marking op2 vs. op3 as regoptional. Can we separate it out somehow to make it more clear?

Also couple of comments will be helpful here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I saw GetOverwrittenOpNumForFMA() I thought that there will be checking with regards to 0...Can you highlight clearly in one of the branch or add a comment as what happens when none of the operands are overwritten and none of them are lastUse (i.e. the case when GetOverwrittenOpNumForFMA() returns 0)?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that this is not yet addressed....Also, since you have extra conditions for overwrittenOpNum != 1, etc. I would do something like this for all operands:

else if (overwrittenOpNum != 2)
{
   if (IsContainableHWIntrinsicOp(node, op2, &supportsOp2RegOptional))
  { 
    // result = (op1 * [op2]) + op3
    MakeSrcContained(node, op2);
  } else {
     assert(supportsOp2RegOptional);
     op2->SetRegOptional();
  }
}

Not only it will make code cleaner, by having separate supportsOp2RegOptional, supportsOp1RegOptional, etc. we would not accidently check for supportsRegOptional flag of some different operand.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kunalspathak, do you mean code like the following? This modified code will only enter the latter branch else if (resultOpNum != 3) in the case the resultOpNum == 2 && HWIntrinsicInfo::CopiesUpperBits(intrinsicId).

The current code is trying to prioritize containable op by checking all the possibilities and set regOptional only when none of the op is containable.

if (resultOpNum != 1 &&!HWIntrinsicInfo::CopiesUpperBits(intrinsicId))
{ 
  if (IsContainableHWIntrinsicOp(node, op1, &supportsOp1RegOptional))
  { 
    // result = ([op1] * op2) + op3
    MakeSrcContained(node, op1);
  } 
  else {
     assert(supportsOp1RegOptional);
     op1->SetRegOptional();
  }
}
else if (resultOpNum != 2)
{
  if (IsContainableHWIntrinsicOp(node, op2, &supportsOp2RegOptional))
  { 
    // result = (op1 * [op2]) + op3
    MakeSrcContained(node, op2);
  } 
  else {
     assert(supportsOp2RegOptional);
     op2->SetRegOptional();
  }
else if (resultOpNum != 3)
{ 
...
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the supportsRegOptional value in function IsContainableHWIntrinsicOp(node, op1, &supportsOp1RegOptional) is only related to the node. We get same result with all the 3 ops. If this is correct, we don't need separate flags.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current code is trying to prioritize containable op by checking all the possibilities and set regOptional only when none of the op is containable.

Ah, ok, then it makes sense. Can you add a comment describing this requirement?

Looks like the supportsRegOptional value in function IsContainableHWIntrinsicOp(node, op1, &supportsOp1RegOptional) is only related to the node.

I don't think so. If you see IsContainableHWIntrinsicOp, it relies on node to set supportsRegOptional.

!HWIntrinsicInfo::CopiesUpperBits(intrinsicId))
{
// result = ([op1] * op2) + op3
MakeSrcContained(node, op1);
}
else if (resultOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional2))
else if (overwrittenOpNum != 2 && IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
{
// result = (op1 * [op2]) + op3
MakeSrcContained(node, op2);
}
else if (resultOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional))
else if (overwrittenOpNum != 3 && IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional))
{
// result = (op1 * op2) + [op3]
MakeSrcContained(node, op3);
}
else if (resultOpNum == 3)
else if (overwrittenOpNum != 1 && !HWIntrinsicInfo::CopiesUpperBits(intrinsicId))
{
assert(supportsRegOptional2);
assert(supportsRegOptional);
op1->SetRegOptional();
}
else if (overwrittenOpNum != 2)
{
assert(supportsRegOptional);
op2->SetRegOptional();
}
else
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2368,7 +2368,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
tgtPrefUse = BuildUse(op2);
srcCount += 1;

if (op1->isContained())
if (op1->isContained() || op1->IsRegOptional())
{
// op2 = ([op1] * op2) + op3
srcCount += BuildOperandUses(op1);
Expand All @@ -2386,7 +2386,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
tgtPrefUse = BuildUse(op3);
srcCount += 1;

if (op1->isContained())
if (op1->isContained() || op1->IsRegOptional())
{
// op3 = ([op1] * op2) + op3
srcCount += BuildOperandUses(op1);
Expand All @@ -2402,10 +2402,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
else
{
assert(resultOpNum == 0);
if (op1->isContained())
if (op1->isContained() || op1->IsRegOptional())
{
// In the case that result is writtent into destination that is different from any of the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: written

// source operands, we set op2 to be tgtPrefUse when op1 is contained.
// source operands, we set op2 to be tgtPrefUse when op1 is contained.

tgtPrefUse = BuildUse(op2);
srcCount += 1;
Expand Down