-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Optimize FMA codegen base on the overwritten #58196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
ee2c0b6
46d0011
cce4bda
b825291
f615e39
b698036
7d9c0d6
1344d92
029a9b5
f2a371f
9955389
7c56653
1d51caa
091133e
9a6ae44
ffcff76
5641f8f
b7312ac
a325fe3
0f950dd
33a596d
5da9368
c3a9f07
9e356aa
f8159bc
18bbe4d
2ca2524
17bd967
eed5912
43c5034
5ef70a5
bfa6924
12f260b
5ca658e
ec4ef66
aa93a85
c66a018
ff5a433
a4657c7
75d7a37
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2339,94 +2339,60 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) | |
| assert(!copiesUpperBits || !op1->isContained()); | ||
|
|
||
| // Intrinsics with CopyUpperBits semantics must have op1 as target | ||
| if (resultOpNum == 1 || copiesUpperBits) | ||
| if ((op1->isContained() || op1->IsRegOptional()) && !copiesUpperBits) | ||
| { | ||
| tgtPrefUse = BuildUse(op1); | ||
| srcCount += 1; | ||
|
|
||
| if (op2->isContained() || op2->IsRegOptional()) | ||
| if (resultOpNum == 3) | ||
| { | ||
| // op1 = (op1 * [op2]) + op3 | ||
| srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); | ||
| srcCount += BuildDelayFreeUses(op3, op1); | ||
| tgtPrefUse = BuildUse(op3); | ||
| // op3 = ([op1] * op2) + op3 | ||
| srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op3); | ||
| srcCount += BuildDelayFreeUses(op2, op3); | ||
| } | ||
| else | ||
| { | ||
| // op1 = (op1 * op2) + [op3] | ||
| srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); | ||
| srcCount += BuildDelayFreeUses(op2, op1); | ||
| } | ||
| } | ||
| else if (resultOpNum == 2) | ||
| { | ||
| tgtPrefUse = BuildUse(op2); | ||
| srcCount += 1; | ||
|
|
||
| if (op1->isContained() || op1->IsRegOptional()) | ||
| { | ||
| tgtPrefUse = BuildUse(op2); | ||
| // op2 = ([op1] * op2) + op3 | ||
| srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op2); | ||
| srcCount += BuildDelayFreeUses(op3, op2); | ||
| } | ||
| else | ||
| srcCount += 1; | ||
| } | ||
| else if (op3->isContained() || op3->IsRegOptional()) | ||
| { | ||
| if (resultOpNum == 2 && !copiesUpperBits) | ||
| { | ||
| tgtPrefUse = BuildUse(op2); | ||
| // op2 = (op1 * op2) + [op3] | ||
| srcCount += BuildDelayFreeUses(op1, op2); | ||
| srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); | ||
| } | ||
| } | ||
| else if (resultOpNum == 3) | ||
| { | ||
| tgtPrefUse = BuildUse(op3); | ||
| srcCount += 1; | ||
|
|
||
| if (op1->isContained() || op1->IsRegOptional()) | ||
| { | ||
| // op3 = ([op1] * op2) + op3 | ||
| srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op3); | ||
| srcCount += BuildDelayFreeUses(op2, op3); | ||
| } | ||
| else | ||
| { | ||
| // op3 = (op1 * [op2]) + op3 | ||
| srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); | ||
| srcCount += BuildDelayFreeUses(op1, op3); | ||
| tgtPrefUse = BuildUse(op1); | ||
| // op1 = (op1 * op2) + [op3] | ||
| srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); | ||
| srcCount += BuildDelayFreeUses(op2, op1); | ||
| } | ||
| srcCount += 1; | ||
| } | ||
| else | ||
| { | ||
| assert(resultOpNum == 0); | ||
| if (op1->isContained() || op1->IsRegOptional()) | ||
| assert(op2->isContained() || op2->IsRegOptional()); | ||
| if (resultOpNum == 3 && !copiesUpperBits) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just capturing a comment, I don't think we need to do anything in this PR. I think the logic around |
||
| { | ||
| // In the case that result is written into destination that is different from any of the | ||
| // source operands, we set op2 to be tgtPrefUse when op1 is contained. | ||
|
|
||
| tgtPrefUse = BuildUse(op2); | ||
| srcCount += 1; | ||
| // result = ([op1] * op2) + op3 | ||
| srcCount += op1->isContained() ? BuildOperandUses(op1) : BuildDelayFreeUses(op1, op2); | ||
| srcCount += BuildDelayFreeUses(op3, op2); | ||
| tgtPrefUse = BuildUse(op3); | ||
| // op3 = (op1 * [op2]) + op3 | ||
| srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op3); | ||
| srcCount += BuildDelayFreeUses(op1, op3); | ||
| } | ||
| else | ||
| { | ||
| // When op1 is not contained, we set op1 to be tgtPrefUse. | ||
|
|
||
| tgtPrefUse = BuildUse(op1); | ||
| srcCount += 1; | ||
|
|
||
| if (op2->isContained() || op2->IsRegOptional()) | ||
| { | ||
| // result = (op1 * [op2]) + op3 | ||
| srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); | ||
| srcCount += BuildDelayFreeUses(op3, op1); | ||
| } | ||
| else | ||
| { | ||
| // result = (op1 * op2) + [op3] | ||
| srcCount += BuildDelayFreeUses(op2, op1); | ||
| srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); | ||
| } | ||
| // op1 = (op1 * [op2]) + op3 | ||
| srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); | ||
| srcCount += BuildDelayFreeUses(op3, op1); | ||
| } | ||
| srcCount += 1; | ||
| } | ||
|
|
||
| buildUses = false; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd suggest swapping the ordering of
op2/op3hereThe reasoning is that this method is picking a preference for "overwritten op".
Preferencing
op1as the first check here makes sense because scalar ops "copy upper bits" and therefore if we're in that scenario,op1is the only operand that can "be the target" as it were, the others will have to be contained or delay free.Preferencing
op2after that (as the secondary preference) simply keeps it consistent with theop1->IsLocalchecks above and results in the "least" amount of operand swapping if we order the containment checks accordingly.