-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Clean up fix for single-reg returned normalize-on-load vars #59645
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This is a better fix for dotnet#58373 that changes the handling of this to happen in morph for all cases. We sometimes missed the insertion of necessary casts because we forgot to remove a GTF_DONT_CSE flag when folding an indirection. Fixing this leads to some new GT_CNS_DBL cases in lowering that hit an assert, but those cases should be correctly handled by the default case so just remove the assert. To get rid of some of the regressions I have allowed generating assertions when assigning struct fields from casts. It was unclear why this was not allowed in the first place.
|
Tagging subscribers to this area: @JulieLeeMSFT Issue DetailsThis is a better fix for #58373 that changes the handling of this to To get rid of some of the regressions I have allowed generating
|
|
Diffs are very small and generally positive, we get rid of some unnecessary casts this way, e.g. ; Final local variable assignments
;
; V00 this [V00,T00] ( 4, 4 ) byref -> rsi this single-def
-; V01 loc0 [V01,T02] ( 2, 2 ) long -> rax ld-addr-op single-def
+;* V01 loc0 [V01 ] ( 0, 0 ) long -> zero-ref ld-addr-op single-def
; V02 OutArgs [V02 ] ( 1, 1 ) lclBlk (32) [rsp+00H] "OutgoingArgSpace"
; V03 tmp1 [V03,T01] ( 4, 4 ) long -> rdi "Inline stloc first use temp"
;
@@ -37,9 +37,8 @@ G_M19257_IG02: ; gcrefRegs=00000000 {}, byrefRegs=00000040 {rsi}, byref
mov eax, eax
shl rax, 32
or rdi, rax
- mov rax, rdi
- vmovd xmm0, rax
- ;; bbWeight=1 PerfScore 6.00
+ vmovd xmm0, rdi
+ ;; bbWeight=1 PerfScore 5.75
G_M19257_IG03: ; , epilog, nogc, extend
add rsp, 40
pop rsi
@@ -47,7 +46,7 @@ G_M19257_IG03: ; , epilog, nogc, extend
ret
;; bbWeight=1 PerfScore 2.25
-; Total bytes of code 54, prolog size 9, PerfScore 17.15, instruction count 19, allocated bytes for code 54 (MethodHash=3770b4c6) for method Internal.IL.ILDisassembler:ReadILDouble():double:this
+; Total bytes of code 51, prolog size 9, PerfScore 16.60, instruction count 18, allocated bytes for code 51 (MethodHash=3770b4c6) for method Internal.IL.ILDisassembler:ReadILDouble():double:this ;# V01 OutArgs [V01 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
; V02 tmp1 [V02,T01] ( 2, 4 ) ubyte -> rax "Inlining Arg"
;* V03 tmp2 [V03 ] ( 0, 0 ) struct ( 8) zero-ref "NewObj constructor temp"
-; V04 tmp3 [V04,T02] ( 2, 2 ) ubyte -> rax V03.Item1(offs=0x00) P-INDEP "field V03.Item1 (fldOffset=0x0)"
+;* V04 tmp3 [V04 ] ( 0, 0 ) ubyte -> zero-ref V03.Item1(offs=0x00) P-INDEP "field V03.Item1 (fldOffset=0x0)"
;
; Lcl frame size = 0
@@ -20,13 +20,12 @@ G_M45153_IG01: ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref, nog
G_M45153_IG02: ; gcrefRegs=00000002 {rcx}, byrefRegs=00000000 {}, byref
; gcrRegs +[rcx]
movzx rax, byte ptr [rcx+8]
- movzx rax, al
- ;; bbWeight=1 PerfScore 2.25
+ ;; bbWeight=1 PerfScore 2.00
G_M45153_IG03: ; , epilog, nogc, extend
ret
;; bbWeight=1 PerfScore 1.00
-; Total bytes of code 8, prolog size 0, PerfScore 4.05, instruction count 3, allocated bytes for code 8 (MethodHash=0f984f9e) for method System.TupleExtensions:ToValueTuple(System.Tuple`1[Byte]):System.ValueTuple`1[Byte]
+; Total bytes of code 5, prolog size 0, PerfScore 3.50, instruction count 2, allocated bytes for code 5 (MethodHash=0f984f9e) for method System.TupleExtensions:ToValueTuple(System.Tuple`1[Byte]):System.ValueTuple`1[Byte]There are also cases where we now do normalization that we didn't do before and that we actually should have done before: ; Final local variable assignments
;
;# V00 OutArgs [V00 ] ( 1, 1 ) lclBlk ( 0) [rsp+00H] "OutgoingArgSpace"
-; V01 tmp1 [V01,T00] ( 2, 4 ) double -> mm0 ld-addr-op "Inlining Arg"
+;* V01 tmp1 [V01 ] ( 0, 0 ) double -> zero-ref ld-addr-op "Inlining Arg"
;
; Lcl frame size = 0
diff --git "a/C:\\dev\\dotnet\\spmi\\asm.fix_58373_2.coreclr_tests.pmi.windows.x64.checked\\base\\/210391.dasm" "b/C:\\dev\\dotnet\\spmi\\asm.fix_58373_2.coreclr_tests.pmi.windows.x64.checked\\diff\\/210391.dasm"
index 1cd586f..aff9ae0 100644
--- "a/C:\\dev\\dotnet\\spmi\\asm.fix_58373_2.coreclr_tests.pmi.windows.x64.checked\\base\\/210391.dasm"
+++ "b/C:\\dev\\dotnet\\spmi\\asm.fix_58373_2.coreclr_tests.pmi.windows.x64.checked\\diff\\/210391.dasm"
@@ -14,13 +14,13 @@
G_M53716_IG01: ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref, nogc <-- Prolog IG
;; bbWeight=1 PerfScore 0.00
G_M53716_IG02: ; gcrefRegs=00000000 {}, byrefRegs=00000000 {}, byref
- mov eax, ecx
+ movsx rax, cl
;; bbWeight=1 PerfScore 0.25
G_M53716_IG03: ; , epilog, nogc, extend
ret
;; bbWeight=1 PerfScore 1.00
-; Total bytes of code 3, prolog size 0, PerfScore 1.55, instruction count 2, allocated bytes for code 3 (MethodHash=49b22e2b) for method Test:noinline1(byte):byte
+; Total bytes of code 5, prolog size 0, PerfScore 1.75, instruction count 2, allocated bytes for code 5 (MethodHash=49b22e2b) for method Test:noinline1(byte):bytefor the code: runtime/src/tests/JIT/Directed/zeroinit/init_byte.il Lines 16 to 23 in 0c0bd5a
I doubt we would be able to hit a problem with this though since it only happens for primitive types in registers and those would have been normalized already in managed codegen (and for reverse-pinvoke the caller does not make assumptions about normalization). The summary is: benchmarks.run.windows.x64.checked.mch:Detail diffscoreclr_tests.pmi.windows.x64.checked.mch:Detail diffslibraries.crossgen2.windows.x64.checked.mch:Detail diffslibraries.pmi.windows.x64.checked.mch:Detail diffslibraries_tests.pmi.windows.x64.checked.mch:Detail diffs |
| // Try and see if we can make a subrange assertion. | ||
| if (((assertionKind == OAK_SUBRANGE) || (assertionKind == OAK_EQUAL))) | ||
| { | ||
| // Keep the casts on small struct fields. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can also delete this from the global propagation code too, I presume. Though we do not have practically any test coverage for that code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, it did not result in any diffs except for when it pushed out some other assertions.
|
PTAL @dotnet/jit-contrib @sandreenko |
sandreenko
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
@dotnet/jit-contrib Ping, need an MS org review in addition to @sandreenko's. |
This is a better fix for #58373 that changes the handling of this to
happen in morph for all cases. We sometimes missed the insertion of
necessary casts because we forgot to remove a GTF_DONT_CSE flag when
folding an indirection. Fixing this leads to some new GT_CNS_DBL cases
in lowering that hit an assert, but those cases should be correctly
handled by the default case (#58589 (comment)) so just remove the assert.
To get rid of some of the regressions I have allowed generating
assertions when assigning struct fields from casts. It was unclear why
this was not allowed in the first place.