Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ee2c0b6
Optimize FMA codegen base on the overwritten
weilinwa Jul 20, 2021
46d0011
Improve function/var names
weilinwa Aug 27, 2021
cce4bda
Add assertions
weilinwa Aug 27, 2021
b825291
Get use of FMA with TryGetUse
weilinwa Sep 7, 2021
f615e39
Decide FMA form with two conditions, OverwrittenOpNum and isContained
weilinwa Sep 8, 2021
b698036
Fix op reg error in codegen
weilinwa Sep 10, 2021
7d9c0d6
Decide form using lastUse and isContained in no overwritten case
weilinwa Sep 15, 2021
1344d92
Clean up code
weilinwa Sep 18, 2021
029a9b5
Separate default case overwrittenOpNum==0
weilinwa Sep 20, 2021
f2a371f
Apply format patch
weilinwa Sep 29, 2021
9955389
Change variable and function names
weilinwa Oct 1, 2021
7c56653
Update regOptional for op1 and resolve some other comments
weilinwa Oct 5, 2021
1d51caa
Optimize FMA codegen base on the overwritten
weilinwa Jul 20, 2021
091133e
Improve function/var names
weilinwa Aug 27, 2021
9a6ae44
Add assertions
weilinwa Aug 27, 2021
ffcff76
Get use of FMA with TryGetUse
weilinwa Sep 7, 2021
5641f8f
Decide FMA form with two conditions, OverwrittenOpNum and isContained
weilinwa Sep 8, 2021
b7312ac
Fix op reg error in codegen
weilinwa Sep 10, 2021
a325fe3
Decide form using lastUse and isContained in no overwritten case
weilinwa Sep 15, 2021
0f950dd
Clean up code
weilinwa Sep 18, 2021
33a596d
Separate default case overwrittenOpNum==0
weilinwa Sep 20, 2021
5da9368
Apply format patch
weilinwa Sep 29, 2021
c3a9f07
Change variable and function names
weilinwa Oct 1, 2021
9e356aa
Update regOptional for op1 and resolve some other comments
weilinwa Oct 5, 2021
f8159bc
Change var names
weilinwa Oct 13, 2021
18bbe4d
Resolve merge conflicts.
weilinwa Oct 13, 2021
2ca2524
Fix jit format
weilinwa Oct 13, 2021
17bd967
Fix build node error for op1 is regOptional
weilinwa Oct 14, 2021
eed5912
Use targetReg instead of GetResultOpNumForFMA in codegen
weilinwa Oct 28, 2021
43c5034
Update variable names
weilinwa Nov 2, 2021
5ef70a5
Refactor lsra to solve lastUse status changed caused assertion failure
weilinwa Nov 7, 2021
bfa6924
Add check to prioritize contained op in lsra
weilinwa Nov 7, 2021
12f260b
Update for jit format
weilinwa Nov 7, 2021
5ca658e
Simplify code
weilinwa Nov 17, 2021
ec4ef66
Resolve comments
weilinwa Nov 17, 2021
aa93a85
Comment out assert because of lastUse change
weilinwa Nov 19, 2021
c66a018
Fix some copiesUpperBits related errors
weilinwa Nov 22, 2021
ff5a433
Merge branch 'main' into fma_opt
weilinwa Nov 22, 2021
a4657c7
Update src/coreclr/jit/lsraxarch.cpp
weilinwa Nov 30, 2021
75d7a37
Add link to the new issue
weilinwa Nov 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refactor lsra to solve lastUse status changed caused assertion failure
  • Loading branch information
weilinwa committed Nov 7, 2021
commit 5ef70a5260db737f10011512f3b6677efd7b1e49
21 changes: 10 additions & 11 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21825,7 +21825,7 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const
//
// Return Value:
// The operand number overwritten or lastUse. 0 is the default value, where the result is written into
// a destination that is not one of the source operands.
// a destination that is not one of the source operands and there is no last use op.
//
unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3)
{
Expand All @@ -21850,17 +21850,16 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForFMA(GenTree* use, GenTree* op1, Ge
return 3;
}
}
else
{
// For LclVar, check if any op is lastUse

if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0))
return 1;
else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0))
return 3;
else
return 2;
}
// If no overwritten op, check if there is any last use op

if (op1->OperIs(GT_LCL_VAR) && op1->IsLastUse(0))
return 1;
else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0))
return 3;
else if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0))
return 2;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest swapping the ordering of op2/op3 here

Suggested change
else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0))
return 3;
else if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0))
return 2;
else if (op2->OperIs(GT_LCL_VAR) && op2->IsLastUse(0))
return 2;
else if (op3->OperIs(GT_LCL_VAR) && op3->IsLastUse(0))
return 3;

The reasoning is that this method is picking a preference for "overwritten op".

Preferencing op1 as the first check here makes sense because scalar ops "copy upper bits" and therefore if we're in that scenario, op1 is the only operand that can "be the target" as it were, the others will have to be contained or delay free.

Preferencing op2 after that (as the secondary preference) simply keeps it consistent with the op1->IsLocal checks above and results in the "least" amount of operand swapping if we order the containment checks accordingly.


return 0;
}
#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS
Expand Down
90 changes: 28 additions & 62 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2339,94 +2339,60 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
assert(!copiesUpperBits || !op1->isContained());

// Intrinsics with CopyUpperBits semantics must have op1 as target
if (resultOpNum == 1 || copiesUpperBits)
if ((op1->isContained() || op1->IsRegOptional()) && !copiesUpperBits)
{
tgtPrefUse = BuildUse(op1);
srcCount += 1;

if (op2->isContained() || op2->IsRegOptional())
if (resultOpNum == 3)
{
// op1 = (op1 * [op2]) + op3
srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1);
srcCount += BuildDelayFreeUses(op3, op1);
tgtPrefUse = BuildUse(op3);
// op3 = ([op1] * op2) + op3
srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op3);
srcCount += BuildDelayFreeUses(op2, op3);
}
else
{
// op1 = (op1 * op2) + [op3]
srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1);
srcCount += BuildDelayFreeUses(op2, op1);
}
}
else if (resultOpNum == 2)
{
tgtPrefUse = BuildUse(op2);
srcCount += 1;

if (op1->isContained() || op1->IsRegOptional())
{
tgtPrefUse = BuildUse(op2);
// op2 = ([op1] * op2) + op3
srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op2);
srcCount += BuildDelayFreeUses(op3, op2);
}
else
srcCount += 1;
}
else if (op3->isContained() || op3->IsRegOptional())
{
if (resultOpNum == 2 && !copiesUpperBits)
{
tgtPrefUse = BuildUse(op2);
// op2 = (op1 * op2) + [op3]
srcCount += BuildDelayFreeUses(op1, op2);
srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1);
}
}
else if (resultOpNum == 3)
{
tgtPrefUse = BuildUse(op3);
srcCount += 1;

if (op1->isContained() || op1->IsRegOptional())
{
// op3 = ([op1] * op2) + op3
srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op3);
srcCount += BuildDelayFreeUses(op2, op3);
}
else
{
// op3 = (op1 * [op2]) + op3
srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3);
srcCount += BuildDelayFreeUses(op1, op3);
tgtPrefUse = BuildUse(op1);
// op1 = (op1 * op2) + [op3]
srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1);
srcCount += BuildDelayFreeUses(op2, op1);
}
srcCount += 1;
}
else
{
assert(resultOpNum == 0);
if (op1->isContained() || op1->IsRegOptional())
assert(op2->isContained() || op2->IsRegOptional());
if (resultOpNum == 3 && !copiesUpperBits)
Copy link
Member

@tannergooding tannergooding Nov 17, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just capturing a comment, I don't think we need to do anything in this PR.

I think the logic around copiesUpperBits could be simplified a bit so we don't need these extra checks everywhere. That is, if copiesUpperBits is true, then resultOpNum doesn't matter if its not 1 so maybe we should be forcing resultOpNum to be 0 in that case (that is if copiesUpperBits == true and resultOpNum != 1, then treat it as 0, because no matter what we do, op1 cannot be swapped or moved about and op2/op3 will be delay free or contained).

{
// In the case that result is written into destination that is different from any of the
// source operands, we set op2 to be tgtPrefUse when op1 is contained.

tgtPrefUse = BuildUse(op2);
srcCount += 1;
// result = ([op1] * op2) + op3
srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op2);
srcCount += BuildDelayFreeUses(op3, op2);
tgtPrefUse = BuildUse(op3);
// op3 = (op1 * [op2]) + op3
srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3);
srcCount += BuildDelayFreeUses(op1, op3);
}
else
{
// When op1 is not contained, we set op1 to be tgtPrefUse.

tgtPrefUse = BuildUse(op1);
srcCount += 1;

if (op2->isContained() || op2->IsRegOptional())
{
// result = (op1 * [op2]) + op3
srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1);
srcCount += BuildDelayFreeUses(op3, op1);
}
else
{
// result = (op1 * op2) + [op3]
srcCount += BuildDelayFreeUses(op2, op1);
srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1);
}
// op1 = (op1 * [op2]) + op3
srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1);
srcCount += BuildDelayFreeUses(op3, op1);
}
srcCount += 1;
}

buildUses = false;
Expand Down