From c92fbdfc2374d3356ebccbaeaf907e7c60185aab Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Dec 2022 19:46:48 -0800 Subject: [PATCH 001/125] Add VectorTableLookup 2/3/4 in hwinstrinsiclistarm64.h --- src/coreclr/jit/hwintrinsiclistarm64.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index cbe956299d1663..1d44c7907c12e0 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -646,6 +646,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, {INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_2, 16, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_3, 16, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_4, 16, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, {INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, {INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1}, HW_Category_SIMD, HW_Flag_NoFlag) From bee0f8cbb898d852a68fa3d0fe0feabe83385814 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Dec 2022 20:42:31 -0800 Subject: [PATCH 002/125] Add VectorTableLookup --- .../Arm/AdvSimd.PlatformNotSupported.cs | 72 +++++++++++++++++++ .../System/Runtime/Intrinsics/Arm/AdvSimd.cs | 72 +++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 14 ++++ 3 files changed, 158 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs index d53236e7116da8..082fbdabf2b231 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs @@ -3661,6 +3661,42 @@ internal Arm64() { } /// public static Vector128 VectorTableLookup(Vector128 table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) /// A64: TBX Vd.16B, {Vn.16B}, Vm.16B @@ -14966,6 +15002,42 @@ internal Arm64() { } /// public static Vector64 VectorTableLookup(Vector128 table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup((Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup((Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) /// A32: VTBX Dd, {Dn, Dn+1}, Dm diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index 6edccbd383663d..fa9e8d9fb89fe7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -3659,6 +3659,42 @@ internal Arm64() { } /// public static Vector128 VectorTableLookup(Vector128 table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup(ValueTuple, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup(ValueTuple, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) /// A64: TBX Vd.16B, {Vn.16B}, Vm.16B @@ -14964,6 +15000,42 @@ internal Arm64() { } /// public static Vector64 VectorTableLookup(Vector128 table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup(ValueTuple, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup(ValueTuple, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) /// A32: VTBX Dd, {Dn, Dn+1}, Dm diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index c3b1e9580c0890..39c460fc070ac1 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2533,6 +2533,12 @@ public unsafe static void StoreSelectedScalar(ulong* address, System.Runtime.Int public static System.Runtime.Intrinsics.Vector128 SubtractWideningUpper(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } @@ -3170,6 +3176,14 @@ public unsafe static void StorePairScalarNonTemporal(uint* address, System.Runti public static System.Runtime.Intrinsics.Vector64 UnzipOdd(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } From 426f68a9a2fb1c468d317822b89e2e0d9d221e30 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 9 Dec 2022 22:54:13 -0800 Subject: [PATCH 003/125] fixes to libraries --- .../Arm/AdvSimd.PlatformNotSupported.cs | 98 +++++++++---------- .../System/Runtime/Intrinsics/Arm/AdvSimd.cs | 92 ++++++++--------- .../ref/System.Runtime.Intrinsics.cs | 24 ++--- 3 files changed, 107 insertions(+), 107 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs index 082fbdabf2b231..c5824deff0b769 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs @@ -3673,29 +3673,29 @@ internal Arm64() { } /// public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } - /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } - - /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } - - /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } - - /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + ///// + ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + ///// + //public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + ///// + ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + ///// + //public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + ///// + ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + ///// + //public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + ///// + ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + ///// + //public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) @@ -15006,37 +15006,37 @@ internal Arm64() { } /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector64 VectorTableLookup((Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } - - /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - /// - public static Vector64 VectorTableLookup((Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } - - /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - /// - public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } - - /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - /// - public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } - - /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - /// - public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B /// - public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + ///// + ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + ///// + //public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + ///// + ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + ///// + //public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + ///// + ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + ///// + //public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + ///// + ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + ///// + //public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index fa9e8d9fb89fe7..669466a84d2f32 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -3671,29 +3671,29 @@ internal Arm64() { } /// public static Vector128 VectorTableLookup(ValueTuple, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); - /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - /// - public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); - - /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - /// - public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); - - /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - /// - public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); - - /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - /// - public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + ///// + ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + ///// + //public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + ///// + ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + ///// + //public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + ///// + ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + ///// + //public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + + ///// + ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + ///// + //public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) @@ -15012,29 +15012,29 @@ internal Arm64() { } /// public static Vector64 VectorTableLookup(ValueTuple, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); - /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - /// - public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); - - /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - /// - public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); - - /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - /// - public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); - - /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - /// - public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + ///// + ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + ///// + //public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + ///// + ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + ///// + //public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + ///// + ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + ///// + //public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + ///// + ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + ///// + //public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 39c460fc070ac1..2391ad67830f54 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2533,12 +2533,12 @@ public unsafe static void StoreSelectedScalar(ulong* address, System.Runtime.Int public static System.Runtime.Intrinsics.Vector128 SubtractWideningUpper(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + //public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + //public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + //public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + //public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } @@ -3177,12 +3177,12 @@ public unsafe static void StorePairScalarNonTemporal(uint* address, System.Runti public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(ValueTuple, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128> table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } From 83eb54cadb7d167fa0615335c8c6945b194c7177 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 14 Dec 2022 01:01:27 -0800 Subject: [PATCH 004/125] Prototype of simple tbl --- src/coreclr/jit/codegenlinear.cpp | 1 + src/coreclr/jit/hwintrinsic.cpp | 30 +++++++++++++++++++++++++- src/coreclr/jit/hwintrinsic.h | 5 +++++ src/coreclr/jit/hwintrinsiclistarm64.h | 6 +++--- src/coreclr/jit/lowerarmarch.cpp | 6 ++++++ src/coreclr/jit/lsraarm64.cpp | 8 +++++++ src/coreclr/jit/lsrabuild.cpp | 17 ++++++++++----- src/coreclr/jit/treelifeupdater.cpp | 20 +++++++++++++++++ 8 files changed, 84 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 7fdbe014806031..f32c9d54201e7f 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1541,6 +1541,7 @@ regNumber CodeGen::genConsumeReg(GenTree* tree) GenTreeLclVar* lcl = tree->gtSkipReloadOrCopy()->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); unsigned firstFieldVarNum = varDsc->lvFieldLclStart; + for (unsigned i = 0; i < varDsc->lvFieldCnt; ++i) { LclVarDsc* fldVarDsc = compiler->lvaGetDesc(firstFieldVarNum + i); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index cb7c69d82793f5..fcfc44652a430b 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1137,7 +1137,35 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, case 2: op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd); op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound); - op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); + +#ifdef TARGET_ARM64 + if (intrinsic == NI_AdvSimd_Arm64_VectorTableLookup) + { + // check the number of fields present in op1 and set NI_AdvSimd_Arm64_VectorTableLookup_2, + // NI_AdvSimd_Arm64_VectorTableLookup_3, etc. + op1 = impPopStack().val; + int fieldCount = info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); + switch (fieldCount) + { + case 2: + intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_2; + break; + case 3: + intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_3; + break; + case 4: + intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_4; + break; + default: + noway_assert("Unknown field count"); + } + + } + else +#endif + { + op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); + } retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic) : gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index b1299df1c1f1cf..60bfc23345b44f 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -787,7 +787,12 @@ struct HWIntrinsicInfo case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal: case NI_AdvSimd_Arm64_LoadPairVector128: case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal: + //case NI_AdvSimd_Arm64_VectorTableLookup_2: return 2; + //case NI_AdvSimd_Arm64_VectorTableLookup_3: + // return 3; + //case NI_AdvSimd_Arm64_VectorTableLookup_4: + // return 4; #endif default: diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 1d44c7907c12e0..14df0705c668a0 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -646,9 +646,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, {INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_2, 16, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_3, 16, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_4, 16, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_2, 16, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_3, 16, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_4, 16, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, {INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, {INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1}, HW_Category_SIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index a32c33113c2087..4a4a4b4c609aaa 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2580,6 +2580,12 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { switch (intrin.id) { + case NI_AdvSimd_Arm64_VectorTableLookup_2: + case NI_AdvSimd_Arm64_VectorTableLookup_3: + case NI_AdvSimd_Arm64_VectorTableLookup_4: + //MakeSrcContained(node, intrin.op1); + intrin.op1->AsLclVar()->SetMultiReg(); + break; case NI_AdvSimd_DuplicateSelectedScalarToVector64: case NI_AdvSimd_DuplicateSelectedScalarToVector128: case NI_AdvSimd_Extract: diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index a479957c77a948..a751e7a93aa51c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1066,6 +1066,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { simdRegToSimdRegMove = varTypeIsFloating(intrinsicTree); } + // If we have an RMW intrinsic or an intrinsic with simple move semantic between two SIMD registers, // we want to preference op1Reg to the target if op1 is not contained. @@ -1086,6 +1087,13 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou else { srcCount += BuildOperandUses(intrin.op1); + + //TODO: Need to fix this reliably. + if ((intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_2)) + { + assert(intrin.op1->OperIs(GT_LCL_VAR)); + BuildUse(intrin.op1, RBM_NONE, 1); + } } } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index d5510858a2b9fa..33e85fb05554de 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1256,11 +1256,15 @@ bool LinearScan::isCandidateMultiRegLclVar(GenTreeLclVar* lclNode) { assert(compiler->lvaEnregMultiRegVars && lclNode->IsMultiReg()); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); - assert(varDsc->lvPromoted); - bool isMultiReg = (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT); - if (!isMultiReg) + bool isMultiReg = lclNode->IsMultiReg(); + if (strcmp(compiler->info.compMethodName, "Test") != 0) { - lclNode->ClearMultiReg(); + assert(varDsc->lvPromoted); + bool isMultiReg = (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT); + if (!isMultiReg) + { + lclNode->ClearMultiReg(); + } } #ifdef DEBUG for (unsigned int i = 0; i < varDsc->lvFieldCnt; i++) @@ -1765,7 +1769,10 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // Currently produce is unused, but need to strengthen an assert to check if produce is // as expected. See https://github.com/dotnet/runtime/issues/8678 int produce = newDefListCount - oldDefListCount; - assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume)); + if (strcmp(compiler->info.compMethodName, "Test") != 0) + { + assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume)); + } // If we are constraining registers, modify all the RefPositions we've just built to specify the // minimum reg count required. diff --git a/src/coreclr/jit/treelifeupdater.cpp b/src/coreclr/jit/treelifeupdater.cpp index c3e5076d728951..65bfac32ad919e 100644 --- a/src/coreclr/jit/treelifeupdater.cpp +++ b/src/coreclr/jit/treelifeupdater.cpp @@ -268,6 +268,26 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) { VarSetOps::AddElemD(compiler, stackVarDeltaSet, varDsc->lvVarIndex); } + + if (lclVarTree->IsMultiRegLclVar()) + { + unsigned firstFieldVarNum = varDsc->lvFieldLclStart; + + for (unsigned i = 0; i < 2; ++i) + { + LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(firstFieldVarNum + i); + bool isInReg = fieldVarDsc->lvIsInReg() && tree->GetRegByIndex(i) != REG_NA; + VarSetOps::AddElemD(compiler, varDeltaSet, fieldVarDsc->lvVarIndex); + + if (isInReg) + { + compiler->codeGen->genUpdateRegLife(fieldVarDsc, isBorn, isDying DEBUGARG(tree)); + } + } + + printf("field"); + + } } } else if (ForCodeGen && lclVarTree->IsMultiRegLclVar()) From d06c4a2b433321278baca6164c0084d0adc4eb39 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 15 Dec 2022 00:23:02 -0800 Subject: [PATCH 005/125] Some progress --- src/coreclr/jit/compiler.h | 5 +++- src/coreclr/jit/gentree.cpp | 7 ++++++ src/coreclr/jit/gentree.h | 14 +++++++++++ src/coreclr/jit/hwintrinsic.h | 20 +++++++++++++++ src/coreclr/jit/lowerarmarch.cpp | 9 ++++++- src/coreclr/jit/lsra.cpp | 21 ++++++++++++++-- src/coreclr/jit/lsra.h | 4 ++- src/coreclr/jit/lsraarm64.cpp | 10 +++++--- src/coreclr/jit/lsrabuild.cpp | 39 ++++++++++++++++++----------- src/coreclr/jit/treelifeupdater.cpp | 3 --- 10 files changed, 106 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1df7ae4841fd89..3b5f2ee35f0d4a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -666,7 +666,10 @@ class LclVarDsc // Valid on promoted struct local fields. }; - unsigned char lvFieldCnt; // Number of fields in the promoted VarDsc. + union { + unsigned char lvFieldCnt; // Number of fields in the promoted VarDsc. + unsigned char regCount; + }; unsigned char lvFldOffset; unsigned char lvFldOrdinal; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 13c79a059ebe64..ada996b3868e3c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -805,6 +805,12 @@ int GenTree::GetRegisterDstCount(Compiler* compiler) const if (OperIsScalarLocal()) { +#ifdef FEATURE_HW_INTRINSICS + if (AsLclVar()->IsMultiRegUse()) + { + return compiler->lvaGetDesc(AsLclVar())->regCount; + } +#endif // FEATURE_HW_INTRINSICS return AsLclVar()->GetFieldCount(compiler); } assert(!"Unexpected multi-reg node"); @@ -864,6 +870,7 @@ bool GenTree::IsMultiRegNode() const { return true; } + return false; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 9c2007df0ea83e..5f253106101b91 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -3771,6 +3771,7 @@ struct GenTreeLclVar : public GenTreeLclVarCommon private: regNumberSmall gtOtherReg[MAX_MULTIREG_COUNT - 1]; MultiRegSpillFlags gtSpillFlags; + bool isMultiRegUse; public: INDEBUG(IL_OFFSET gtLclILoffs;) // instr offset of ref (only for JIT dumps) @@ -3780,6 +3781,13 @@ struct GenTreeLclVar : public GenTreeLclVarCommon { return ((gtFlags & GTF_VAR_MULTIREG) != 0); } + + bool IsMultiRegUse() const + { + assert(!isMultiRegUse || ((gtFlags & GTF_VAR_MULTIREG) != 0)); + return isMultiRegUse; + } + void ClearMultiReg() { gtFlags &= ~GTF_VAR_MULTIREG; @@ -3790,6 +3798,12 @@ struct GenTreeLclVar : public GenTreeLclVarCommon ClearOtherRegFlags(); } + void SetMultiRegUse() + { + isMultiRegUse = true; + SetMultiReg(); + } + regNumber GetRegNumByIdx(int regIndex) const { assert(regIndex < MAX_MULTIREG_COUNT); diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 60bfc23345b44f..0a4b7a4d5e6af6 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -773,6 +773,26 @@ struct HWIntrinsicInfo return (flags & HW_Flag_MultiReg) != 0; } + static int GetMultiRegUseCount(NamedIntrinsic id) + { + //assert(IsMultiRegU) + switch (id) + { +#ifdef TARGET_ARM64 + // TODO-ARM64-NYI: Support hardware intrinsics operating on multiple contiguous registers. + case NI_AdvSimd_Arm64_VectorTableLookup_2: + return 2; + case NI_AdvSimd_Arm64_VectorTableLookup_3: + return 3; + case NI_AdvSimd_Arm64_VectorTableLookup_4: + return 4; +#endif + + default: + unreached(); + } + } + static int GetMultiRegCount(NamedIntrinsic id) { assert(IsMultiReg(id)); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 4a4a4b4c609aaa..3a120782b6acc4 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2581,10 +2581,17 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrin.id) { case NI_AdvSimd_Arm64_VectorTableLookup_2: + comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 2; + intrin.op1->AsLclVar()->SetMultiRegUse(); + break; case NI_AdvSimd_Arm64_VectorTableLookup_3: + comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 3; + intrin.op1->AsLclVar()->SetMultiRegUse(); + break; case NI_AdvSimd_Arm64_VectorTableLookup_4: + comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 4; //MakeSrcContained(node, intrin.op1); - intrin.op1->AsLclVar()->SetMultiReg(); + intrin.op1->AsLclVar()->SetMultiRegUse(); break; case NI_AdvSimd_DuplicateSelectedScalarToVector64: case NI_AdvSimd_DuplicateSelectedScalarToVector128: diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 1203485793fa69..52714ecbc1ee26 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2178,7 +2178,7 @@ void LinearScan::checkLastUses(BasicBlock* block) { // We should never see ParamDefs or ZeroInits within a basic block. assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit); - if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isLocalVar) + if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isLocalVar /*&& !currentRefPosition->needsConsecutive*/) { unsigned varNum = currentRefPosition->getInterval()->varNum; unsigned varIndex = currentRefPosition->getInterval()->getVarIndex(compiler); @@ -2214,7 +2214,23 @@ void LinearScan::checkLastUses(BasicBlock* block) loc); foundDiff = true; } - VarSetOps::AddElemD(compiler, computedLive, varIndex); + + if (currentRefPosition->needsConsecutive) + { + // If this is a case of consecutive registers, refPositions are added so they get register + // They may not be bbLiveIn but are just used directly as operand. Only add them in computedLive + // if they were part of bbLiveIn. + if(VarSetOps::IsMember(compiler, block->bbLiveIn, varIndex)) + { + JITDUMP("++ V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); + VarSetOps::AddElemD(compiler, computedLive, varIndex); + } + } + else + { + JITDUMP("++ V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); + VarSetOps::AddElemD(compiler, computedLive, varIndex); + } } else if (currentRefPosition->lastUse) { @@ -2229,6 +2245,7 @@ void LinearScan::checkLastUses(BasicBlock* block) if (currentRefPosition->refType == RefTypeDef || currentRefPosition->refType == RefTypeDummyDef) { + JITDUMP("-- V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); VarSetOps::RemoveElemD(compiler, computedLive, varIndex); } } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index e30736a63fd5a2..ecc2fb50986bc2 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1838,7 +1838,7 @@ class LinearScan : public LinearScanInterface bool isCandidateMultiRegLclVar(GenTreeLclVar* lclNode); bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); - RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0); + RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0, bool needsConsecutive = false); void setDelayFree(RefPosition* use); int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); @@ -2261,6 +2261,7 @@ class RefPosition // across all targets and that happened to be 4 on Arm. Hence index value // would be 0..MAX_RET_REG_COUNT-1. unsigned char multiRegIdx : 2; + bool needsConsecutive; // Last Use - this may be true for multiple RefPositions in the same Interval unsigned char lastUse : 1; @@ -2347,6 +2348,7 @@ class RefPosition , registerAssignment(RBM_NONE) , refType(refType) , multiRegIdx(0) + , needsConsecutive(false) , lastUse(false) , reload(false) , spillAfter(false) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index a751e7a93aa51c..96c81a71ce7729 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1086,13 +1086,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - srcCount += BuildOperandUses(intrin.op1); - //TODO: Need to fix this reliably. if ((intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_2)) { assert(intrin.op1->OperIs(GT_LCL_VAR)); - BuildUse(intrin.op1, RBM_NONE, 1); + BuildUse(intrin.op1, RBM_NONE, 0, /* needsConsecutive */true); + BuildUse(intrin.op1, RBM_NONE, 1, /* needsConsecutive */ true); + srcCount+=2; + } + else + { + srcCount += BuildOperandUses(intrin.op1); } } } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 33e85fb05554de..6d7afead531b10 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1256,21 +1256,32 @@ bool LinearScan::isCandidateMultiRegLclVar(GenTreeLclVar* lclNode) { assert(compiler->lvaEnregMultiRegVars && lclNode->IsMultiReg()); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); - bool isMultiReg = lclNode->IsMultiReg(); - if (strcmp(compiler->info.compMethodName, "Test") != 0) + bool isMultiReg = false; + if (lclNode->IsMultiReg()) { - assert(varDsc->lvPromoted); - bool isMultiReg = (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT); - if (!isMultiReg) + if (!lclNode->IsMultiRegUse()) + { + assert(varDsc->lvPromoted); + bool isMultiReg = (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT); + if (!isMultiReg) + { + lclNode->ClearMultiReg(); + } + } + else { - lclNode->ClearMultiReg(); + isMultiReg = true; } } + #ifdef DEBUG - for (unsigned int i = 0; i < varDsc->lvFieldCnt; i++) + if (!lclNode->IsMultiRegUse()) { - LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i); - assert(isCandidateVar(fieldVarDsc) == isMultiReg); + for (unsigned int i = 0; i < varDsc->lvFieldCnt; i++) + { + LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i); + assert(isCandidateVar(fieldVarDsc) == isMultiReg); + } } #endif // DEBUG return isMultiReg; @@ -1667,7 +1678,7 @@ int LinearScan::ComputeOperandDstCount(GenTree* operand) } if (operand->IsValue()) { - // Operands that are values and are not contained consume all of their operands + // Operands that are values and are not contained, consume all of their operands // and produce one or more registers. return operand->GetRegisterDstCount(compiler); } @@ -1769,10 +1780,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc // Currently produce is unused, but need to strengthen an assert to check if produce is // as expected. See https://github.com/dotnet/runtime/issues/8678 int produce = newDefListCount - oldDefListCount; - if (strcmp(compiler->info.compMethodName, "Test") != 0) - { - assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume)); - } + assert((consume == 0) || (ComputeAvailableSrcCount(tree) == consume)); // If we are constraining registers, modify all the RefPositions we've just built to specify the // minimum reg count required. @@ -3042,7 +3050,7 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // Notes: // The node must not be contained, and must have been processed by buildRefPositionsForNode(). // -RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) +RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx, bool needsConsecutive) { assert(!operand->isContained()); Interval* interval; @@ -3096,6 +3104,7 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu operand = nullptr; } RefPosition* useRefPos = newRefPosition(interval, currentLoc, RefTypeUse, operand, candidates, multiRegIdx); + useRefPos->needsConsecutive = needsConsecutive; useRefPos->setRegOptional(regOptional); return useRefPos; } diff --git a/src/coreclr/jit/treelifeupdater.cpp b/src/coreclr/jit/treelifeupdater.cpp index 65bfac32ad919e..6d5d39d7625631 100644 --- a/src/coreclr/jit/treelifeupdater.cpp +++ b/src/coreclr/jit/treelifeupdater.cpp @@ -284,9 +284,6 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) compiler->codeGen->genUpdateRegLife(fieldVarDsc, isBorn, isDying DEBUGARG(tree)); } } - - printf("field"); - } } } From 99adc17845c06f9c25d37e8ab310c8d1abad850f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 25 Dec 2022 13:37:04 -0800 Subject: [PATCH 006/125] Some more updates --- src/coreclr/jit/codegenlinear.cpp | 2 +- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 14 ++ src/coreclr/jit/hwintrinsiclistarm64.h | 10 +- src/coreclr/jit/lsra.cpp | 193 +++++++++++++------- src/coreclr/jit/lsra.h | 23 ++- src/coreclr/jit/lsraarm64.cpp | 7 +- src/coreclr/jit/treelifeupdater.cpp | 4 +- 8 files changed, 180 insertions(+), 75 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index f32c9d54201e7f..6408b8baf768ff 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1444,7 +1444,7 @@ regNumber CodeGen::genConsumeReg(GenTree* tree, unsigned multiRegIndex) assert(lcl->IsMultiReg()); LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); - assert(varDsc->lvPromoted); + //assert(varDsc->lvPromoted); assert(multiRegIndex < varDsc->lvFieldCnt); unsigned fieldVarNum = varDsc->lvFieldLclStart + multiRegIndex; LclVarDsc* fldVarDsc = compiler->lvaGetDesc(fieldVarNum); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 68483414fb7f79..3b764ca05681dc 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -9728,7 +9728,7 @@ void cTreeFlags(Compiler* comp, GenTree* tree) { chars += printf("[CALL]"); } - switch (op) + switch (op) { case GT_MUL: case GT_CAST: diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 555865bf751d7f..bdd8e7cc39a1d1 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -424,6 +424,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) instruction ins = INS_invalid; switch (intrin.id) { + case NI_AdvSimd_Arm64_VectorTableLookup_2: + ins = INS_tbl_2regs; + break; + case NI_AdvSimd_AddWideningLower: assert(varTypeIsIntegral(intrin.baseType)); if (intrin.op1->TypeGet() == TYP_SIMD8) @@ -489,6 +493,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrin.id) { + case NI_AdvSimd_Arm64_VectorTableLookup_2: + if (intrin.op1->IsCopyOrReload()) + { + GenTree* op1 = intrin.op1->AsCopyOrReload()->gtGetOp1(); + assert(!op1->IsCopyOrReload()); + op1Reg = op1->GetRegNum(); + } + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); + break; + case NI_AdvSimd_BitwiseSelect: // Even though BitwiseSelect is an RMW intrinsic per se, we don't want to mark it as such // since we can handle all possible allocation decisions for targetReg. diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 14df0705c668a0..1fd0e50104cc7d 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -471,8 +471,8 @@ HARDWARE_INTRINSIC(AdvSimd, SubtractSaturateScalar, HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sub, INS_sub, INS_fsub, INS_fsub}, HW_Category_SIMD, HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, {INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, {INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, {INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor}, HW_Category_SIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, {INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningUpper, 16, 1, {INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) @@ -646,9 +646,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, {INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_2, 16, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_3, 16, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_4, 16, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_2, 16, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_3, 16, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_4, 16, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, {INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, {INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1}, HW_Category_SIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 52714ecbc1ee26..db9021a21cb033 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2215,18 +2215,18 @@ void LinearScan::checkLastUses(BasicBlock* block) foundDiff = true; } - if (currentRefPosition->needsConsecutive) - { - // If this is a case of consecutive registers, refPositions are added so they get register - // They may not be bbLiveIn but are just used directly as operand. Only add them in computedLive - // if they were part of bbLiveIn. - if(VarSetOps::IsMember(compiler, block->bbLiveIn, varIndex)) - { - JITDUMP("++ V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); - VarSetOps::AddElemD(compiler, computedLive, varIndex); - } - } - else + //if (currentRefPosition->needsConsecutive) + //{ + // // If this is a case of consecutive registers, refPositions are added so they get register + // // They may not be bbLiveIn but are just used directly as operand. Only add them in computedLive + // // if they were part of bbLiveIn. + // if(VarSetOps::IsMember(compiler, block->bbLiveIn, varIndex)) + // { + // JITDUMP("++ V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); + // VarSetOps::AddElemD(compiler, computedLive, varIndex); + // } + //} + //else { JITDUMP("++ V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); VarSetOps::AddElemD(compiler, computedLive, varIndex); @@ -2786,73 +2786,101 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // no such ref position, no register will be allocated. // -regNumber LinearScan::allocateReg(Interval* currentInterval, +regNumber LinearScan::allocateReg(Interval* referentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { + Interval* currentInterval = refPosition->getInterval(); + assert(referentInterval == currentInterval); + regMaskTP foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) { return REG_NA; } + regNumber regToReturn = genRegNumFromMask(foundRegBit); - regNumber foundReg = genRegNumFromMask(foundRegBit); - RegRecord* availablePhysRegRecord = getRegisterRecord(foundReg); - Interval* assignedInterval = availablePhysRegRecord->assignedInterval; - if ((assignedInterval != currentInterval) && - isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) - { - if (regSelector->isSpilling()) + //do + //{ + currentInterval = refPosition->getInterval(); + regNumber foundReg = genRegNumFromMask(foundRegBit); + RegRecord* availablePhysRegRecord = getRegisterRecord(foundReg); + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + if ((assignedInterval != currentInterval) && + isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) { - // We're spilling. - CLANG_FORMAT_COMMENT_ANCHOR; + if (regSelector->isSpilling()) + { + // We're spilling. + CLANG_FORMAT_COMMENT_ANCHOR; #ifdef TARGET_ARM - if (currentInterval->registerType == TYP_DOUBLE) - { - assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); - unassignDoublePhysReg(availablePhysRegRecord); - } - else if (assignedInterval->registerType == TYP_DOUBLE) - { - // Make sure we spill both halves of the double register. - assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); - unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); - } - else + if (currentInterval->registerType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); + unassignDoublePhysReg(availablePhysRegRecord); + } + else if (assignedInterval->registerType == TYP_DOUBLE) + { + // Make sure we spill both halves of the double register. + assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); + unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); + } + else #endif - { - unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); - } - } - else - { - // If we considered this "unassigned" because this interval's lifetime ends before - // the next ref, remember it. - // For historical reasons (due to former short-circuiting of this case), if we're reassigning - // the current interval to a previous assignment, we don't remember the previous interval. - // Note that we need to compute this condition before calling unassignPhysReg, which wil reset - // assignedInterval->physReg. - bool wasAssigned = regSelector->foundUnassignedReg() && (assignedInterval != nullptr) && - (assignedInterval->physReg == foundReg); - unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); - if (regSelector->isMatchingConstant() && compiler->opts.OptimizationEnabled()) - { - assert(assignedInterval->isConstant); - refPosition->treeNode->SetReuseRegVal(); - } - else if (wasAssigned) - { - updatePreviousInterval(availablePhysRegRecord, assignedInterval, assignedInterval->registerType); + { + unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); + } } else { - assert(!regSelector->isConstAvailable()); + // If we considered this "unassigned" because this interval's lifetime ends before + // the next ref, remember it. + // For historical reasons (due to former short-circuiting of this case), if we're reassigning + // the current interval to a previous assignment, we don't remember the previous interval. + // Note that we need to compute this condition before calling unassignPhysReg, which wil reset + // assignedInterval->physReg. + bool wasAssigned = regSelector->foundUnassignedReg() && (assignedInterval != nullptr) && + (assignedInterval->physReg == foundReg); + unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); + if (regSelector->isMatchingConstant() && compiler->opts.OptimizationEnabled()) + { + assert(assignedInterval->isConstant); + refPosition->treeNode->SetReuseRegVal(); + } + else if (wasAssigned) + { + updatePreviousInterval(availablePhysRegRecord, assignedInterval, assignedInterval->registerType); + } + else + { + assert(!regSelector->isConstAvailable()); + } } } + + // At this point, we need to make sure that other `regCount` registers are available and then just allocate them + // to subsequent refpositions. + assignPhysReg(availablePhysRegRecord, currentInterval); + refPosition->registerAssignment = foundRegBit; + + + // // Next iteration + // foundRegBit <<= 1; + // refPosition = refPosition->nextConsecutiveRefPosition; + + //} while ((refPosition != nullptr) && (refPosition->needsConsecutive)); + + RefPosition* consecutiveRefPosition = refPosition->nextConsecutiveRefPosition; + while (consecutiveRefPosition != nullptr) + { + //TODO: Unassign anything else for this register. + foundRegBit <<= 1; + consecutiveRefPosition->registerAssignment = foundRegBit; + + consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; } - assignPhysReg(availablePhysRegRecord, currentInterval); - refPosition->registerAssignment = foundRegBit; - return foundReg; + + return regToReturn; } //------------------------------------------------------------------------ @@ -3082,6 +3110,10 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) // We *must* allocate a register, and it will be a copyReg. Set that field now, so that // refPosition->RegOptional() will return false. refPosition->copyReg = true; + //if (refPosition->getMultiRegIdx() != 0) + //{ + // refPosition->multiRegIdx = 0; + //} RegisterScore registerScore = NONE; regNumber allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); @@ -3737,6 +3769,25 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition) nullptr)); } +#ifdef TARGET_ARM64 +regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) +{ + regMaskTP result = candidates & m_AvailableRegs; + if (!refPosition->needsConsecutive) + { + return result; + } + + // If refPosition->multiRegIdx == 0, we need to make sure we check for all the + // `regCount` available regs. + // Once we do that just allocate consecutively. + + result &= (m_AvailableRegs >> (refPosition->regCount - 1)); + + return result; +} +#endif + //------------------------------------------------------------------------ // processBlockEndAllocation: Update var locations after 'currentBlock' has been allocated // @@ -4633,6 +4684,12 @@ void LinearScan::allocateRegisters() for (RefPosition& currentRefPosition : refPositions) { + //TODO: Add logic to skip past consecutive registers refPositions. + //if (currentRefPosition.needsConsecutive && currentRefPosition.regCount == 0) + //{ + // continue; + //} + //while (currentRefPosition) RefPosition* nextRefPosition = currentRefPosition.nextRefPosition; // TODO: Can we combine this with the freeing of registers below? It might @@ -5790,7 +5847,7 @@ void LinearScan::writeLocalReg(GenTreeLclVar* lclNode, unsigned varNum, regNumbe { assert(compiler->lvaEnregMultiRegVars); LclVarDsc* parentVarDsc = compiler->lvaGetDesc(lclNode); - assert(parentVarDsc->lvPromoted); + assert(parentVarDsc->lvPromoted || lclNode->IsMultiRegUse()); unsigned regIndex = varNum - parentVarDsc->lvFieldLclStart; assert(regIndex < MAX_MULTIREG_COUNT); lclNode->SetRegNumByIdx(reg, regIndex); @@ -6265,7 +6322,11 @@ void LinearScan::insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned m // Insert the copy/reload after the spilled node and replace the use of the original node with a use // of the copy/reload. blockRange.InsertAfter(tree, newNode); - treeUse.ReplaceWith(newNode); + + //if (multiRegIdx == 0) + { + treeUse.ReplaceWith(newNode); + } } } @@ -11972,7 +12033,13 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, reverseSelect = linearScan->doReverseSelect(); #endif // DEBUG +#if defined(TARGET_ARM) freeCandidates = linearScan->getFreeCandidates(candidates, regType); +#elif defined(TARGET_ARM64) + freeCandidates = linearScan->getFreeCandidates(candidates, refPosition); +#else + freeCandidates = linearScan->getFreeCandidates(candidates); +#endif // TARGET_ARM // If no free candidates, then double check if refPosition is an actual ref. if (freeCandidates == RBM_NONE) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index ecc2fb50986bc2..d7ea7986330c03 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1182,7 +1182,12 @@ class LinearScan : public LinearScanInterface /***************************************************************************** * Register selection ****************************************************************************/ - regMaskTP getFreeCandidates(regMaskTP candidates, var_types regType) + + + + +#if !defined(TARGET_ARM64) + regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) { regMaskTP result = candidates & m_AvailableRegs; #ifdef TARGET_ARM @@ -1193,8 +1198,14 @@ class LinearScan : public LinearScanInterface result &= (m_AvailableRegs >> 1); } #endif // TARGET_ARM +#ifdef TARGET_ARM64 + +#endif // TARGET_ARM64 return result; } +#else + regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); +#endif #ifdef DEBUG class RegisterSelection; @@ -2233,6 +2244,13 @@ class RefPosition // are only traversed in the forward direction, and are not moved. RefPosition* nextRefPosition; + // This is temporary. It will be moved to LinearScan level in a map that will store + // the next refposition. Below table, we are storing 2 situation of consecutive registers + // First being 3 consecutive registers (21, 22, 23) and (41, 42). + // 21 -> 22 + // 22 -> 23 + // 41 -> 42 + RefPosition* nextConsecutiveRefPosition; // The remaining fields are common to both options GenTree* treeNode; unsigned int bbNum; @@ -2262,6 +2280,7 @@ class RefPosition // would be 0..MAX_RET_REG_COUNT-1. unsigned char multiRegIdx : 2; bool needsConsecutive; + unsigned char regCount : 2; // Last Use - this may be true for multiple RefPositions in the same Interval unsigned char lastUse : 1; @@ -2342,6 +2361,7 @@ class RefPosition RefType refType DEBUG_ARG(GenTree* buildNode)) : referent(nullptr) , nextRefPosition(nullptr) + , nextConsecutiveRefPosition(nullptr) , treeNode(treeNode) , bbNum(bbNum) , nodeLocation(nodeLocation) @@ -2349,6 +2369,7 @@ class RefPosition , refType(refType) , multiRegIdx(0) , needsConsecutive(false) + , regCount(1) , lastUse(false) , reload(false) , spillAfter(false) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 96c81a71ce7729..feea976ee63929 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1090,8 +1090,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if ((intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_2)) { assert(intrin.op1->OperIs(GT_LCL_VAR)); - BuildUse(intrin.op1, RBM_NONE, 0, /* needsConsecutive */true); - BuildUse(intrin.op1, RBM_NONE, 1, /* needsConsecutive */ true); + RefPosition* useRefPos1 = BuildUse(intrin.op1, RBM_NONE, 0, /* needsConsecutive */true); + useRefPos1->regCount = 2; + RefPosition* useRefPos2 = BuildUse(intrin.op1, RBM_NONE, 1, /* needsConsecutive */ true); + useRefPos2->regCount = 0; // Explicitely set it so we can identify that this is tail. + useRefPos1->nextConsecutiveRefPosition = useRefPos2; srcCount+=2; } else diff --git a/src/coreclr/jit/treelifeupdater.cpp b/src/coreclr/jit/treelifeupdater.cpp index 6d5d39d7625631..a14c87c236f199 100644 --- a/src/coreclr/jit/treelifeupdater.cpp +++ b/src/coreclr/jit/treelifeupdater.cpp @@ -38,8 +38,8 @@ template bool TreeLifeUpdater::UpdateLifeFieldVar(GenTreeLclVar* lclNode, unsigned multiRegIndex) { LclVarDsc* parentVarDsc = compiler->lvaGetDesc(lclNode); - assert(parentVarDsc->lvPromoted && (multiRegIndex < parentVarDsc->lvFieldCnt) && lclNode->IsMultiReg() && - compiler->lvaEnregMultiRegVars); + /* assert(parentVarDsc->lvPromoted && (multiRegIndex < parentVarDsc->lvFieldCnt) && lclNode->IsMultiReg() && + compiler->lvaEnregMultiRegVars);*/ unsigned fieldVarNum = parentVarDsc->lvFieldLclStart + multiRegIndex; LclVarDsc* fldVarDsc = compiler->lvaGetDesc(fieldVarNum); assert(fldVarDsc->lvTracked); From 7b10969ec9736eec799c362febe95dbfb77b716a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 4 Jan 2023 13:51:36 -0800 Subject: [PATCH 007/125] working model --- src/coreclr/jit/hwintrinsic.cpp | 84 +++++++++++++++++++++++++------- src/coreclr/jit/lower.cpp | 2 +- src/coreclr/jit/lowerarmarch.cpp | 4 +- src/coreclr/jit/lsra.cpp | 73 ++++++++++++++++++++++++--- src/coreclr/jit/morph.cpp | 2 +- 5 files changed, 139 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index fcfc44652a430b..5587cab0ce25ee 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1144,32 +1144,82 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // check the number of fields present in op1 and set NI_AdvSimd_Arm64_VectorTableLookup_2, // NI_AdvSimd_Arm64_VectorTableLookup_3, etc. op1 = impPopStack().val; - int fieldCount = info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); - switch (fieldCount) + ClassLayout* layout = op1->GetLayout(this); + unsigned structSize = layout->GetSize(); + unsigned slotCount = layout->GetSlotCount(); + + /*GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + for (unsigned i = 0; i < fieldCount; i++) { - case 2: - intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_2; - break; - case 3: - intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_3; - break; - case 4: - intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_4; - break; - default: - noway_assert("Unknown field count"); + LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); + GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet()); + fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); + fieldLclNum++; } - + return fieldList;*/ + unsigned fieldCount = slotCount / info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); + + if (fieldCount > 1) + { + op1->AsLclVar()->SetMultiRegUse(); + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + int offset = 0; + for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) + { + unsigned lclNum = lvaGrabTemp(true DEBUGARG("VectorTableLookup")); + LclVarDsc* fldVarDsc = lvaGetDesc(lclNum); + fldVarDsc->lvType = TYP_SIMD16; + + CORINFO_FIELD_HANDLE fieldHandle = + info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, fieldId); + CORINFO_CLASS_HANDLE classHandle = info.compCompHnd->getFieldClass(fieldHandle); + lvaSetStruct(lclNum, classHandle, true); + + GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, TYP_SIMD16, offset); + + // varDsc->SetLayout(layout); + + fieldList->AddField(this, fldNode, 0, TYP_SIMD16); + offset += 16; + } + // op1 = fieldList; + + /*const CORINFO_FIELD_HANDLE field1 = + info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, 0); + unsigned fldOffset1 = info.compCompHnd->getFieldOffset(field1); + const CORINFO_FIELD_HANDLE field2 = info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, 1); + unsigned fldOffset2 = info.compCompHnd->getFieldOffset(field2);*/ + + switch (fieldCount) + { + case 1: + // keep the intrinsic + break; + case 2: + intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_2; + break; + case 3: + intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_3; + break; + case 4: + intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_4; + break; + default: + noway_assert("Unknown field count"); + } + } + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + lvaGetDesc(op1->AsLclVar())->lvUsedInSIMDIntrinsic = false; } else #endif { op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); + retNode = isScalar + ? gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic) + : gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); } - retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic) - : gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - #ifdef TARGET_XARCH if ((intrinsic == NI_SSE42_Crc32) || (intrinsic == NI_SSE42_X64_Crc32)) { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 572a46c8d3bc81..24dfa12d4fade1 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6668,7 +6668,7 @@ void Lowering::CheckNode(Compiler* compiler, GenTree* node) #endif // FEATURE_SIMD && TARGET_64BIT if (varDsc->lvPromoted) { - assert(varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegRet); + assert(varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegRet || node->AsLclVar()->IsMultiRegUse()); } } break; diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 3a120782b6acc4..8364dd4cc3eefb 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2583,15 +2583,17 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_Arm64_VectorTableLookup_2: comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 2; intrin.op1->AsLclVar()->SetMultiRegUse(); + //MakeSrcContained(node, intrin.op1); break; case NI_AdvSimd_Arm64_VectorTableLookup_3: comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 3; intrin.op1->AsLclVar()->SetMultiRegUse(); + //MakeSrcContained(node, intrin.op1); break; case NI_AdvSimd_Arm64_VectorTableLookup_4: comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 4; - //MakeSrcContained(node, intrin.op1); intrin.op1->AsLclVar()->SetMultiRegUse(); + //MakeSrcContained(node, intrin.op1); break; case NI_AdvSimd_DuplicateSelectedScalarToVector64: case NI_AdvSimd_DuplicateSelectedScalarToVector128: diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index db9021a21cb033..78508e08ed3edc 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2875,7 +2875,17 @@ regNumber LinearScan::allocateReg(Interval* referentInterval, { //TODO: Unassign anything else for this register. foundRegBit <<= 1; + foundReg = genRegNumFromMask(foundRegBit); + /*Interval* consecutiveInterval = consecutiveRefPosition->getInterval(); + if (consecutiveInterval->physReg != foundReg) + { + consecutiveInterval->isActive = false; + unassignPhysReg(consecutiveInterval->physReg); + consecutiveInterval->isActive = true; + }*/ + consecutiveRefPosition->registerAssignment = foundRegBit; + consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; } @@ -5210,7 +5220,7 @@ void LinearScan::allocateRegisters() regMaskTP assignedRegBit = RBM_NONE; bool isInRegister = false; - if (assignedRegister != REG_NA) + if ((assignedRegister != REG_NA) /*&& !currentRefPosition.needsConsecutive*/) { isInRegister = true; assignedRegBit = genRegMask(assignedRegister); @@ -5243,7 +5253,7 @@ void LinearScan::allocateRegisters() assert(previousRefPosition->nextRefPosition == ¤tRefPosition); assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment || currentRefPosition.outOfOrder || previousRefPosition->copyReg || - previousRefPosition->refType == RefTypeExpUse || currentRefPosition.refType == RefTypeDummyDef); + previousRefPosition->refType == RefTypeExpUse || currentRefPosition.refType == RefTypeDummyDef || currentRefPosition.needsConsecutive); } else if (assignedRegister != REG_NA) { @@ -5317,7 +5327,7 @@ void LinearScan::allocateRegisters() } } - if (assignedRegister != REG_NA) + if ((assignedRegister != REG_NA)) { RegRecord* physRegRecord = getRegisterRecord(assignedRegister); assert((assignedRegBit == currentRefPosition.registerAssignment) || @@ -5338,7 +5348,7 @@ void LinearScan::allocateRegisters() setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } - else if ((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) + else if (((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0)) { currentRefPosition.registerAssignment = assignedRegBit; if (!currentInterval->isActive) @@ -5421,6 +5431,57 @@ void LinearScan::allocateRegisters() } } + if (currentRefPosition.needsConsecutive) + { + // For consecutive register, we would like to assign a register (if not already assigned) + // to the 1st position and the subsequent positions will just get the consecutive register. + if (currentRefPosition.multiRegIdx == 0) + { + if (assignedRegister != REG_NA) + { + // For 1st position, if it already has a register assigned, then just assign + // subsequent registers to remaining position and skip the allocation for the + // 1st position altogether. + + RefPosition* consecutiveRefPosition = currentRefPosition.nextConsecutiveRefPosition; + regMaskTP registerBit = assignedRegBit; + while (consecutiveRefPosition != nullptr) + { + // TODO: Unassign anything else for this register. + registerBit <<= 1; + regNumber foundReg = genRegNumFromMask(registerBit); + /*Interval* consecutiveInterval = consecutiveRefPosition->getInterval(); + if (consecutiveInterval->physReg != foundReg) + { + consecutiveInterval->isActive = false; + unassignPhysReg(consecutiveInterval->physReg); + consecutiveInterval->isActive = true; + }*/ + + consecutiveRefPosition->registerAssignment = registerBit; + + consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; + } + } + } + else + { + if (assignedRegBit == currentRefPosition.registerAssignment) + { + // For the subsequent position, if they already have the subsequent register assigned, then + // no need to find register to assign. + allocate = false; + } + else + { + // If subsequent position is not assigned to the subsequent register, then reassign the right + // consecutive register. + assignedRegister = REG_NA; + } + } + + } + if (assignedRegister == REG_NA) { if (currentRefPosition.RegOptional()) @@ -5511,7 +5572,7 @@ void LinearScan::allocateRegisters() // If we allocated a register, and this is a use of a spilled value, // it should have been marked for reload above. - if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister) + if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister && !currentRefPosition.needsConsecutive) { assert(currentRefPosition.reload); } @@ -5997,7 +6058,7 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreeLclVar* treeNode, Ref if (reload) { assert(currentRefPosition->refType != RefTypeDef); - assert(interval->isSpilled); + assert(interval->isSpilled || currentRefPosition->needsConsecutive); varDsc->SetRegNum(REG_STK); if (!spillAfter) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 13e81bda6ac74e..58a7399007881c 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12149,7 +12149,7 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) // // So here we preserve this invariant and mark any promoted structs as do-not-enreg. // - if (operand->OperIs(GT_LCL_VAR) && lvaGetDesc(operand->AsLclVar())->lvPromoted) + if (operand->OperIs(GT_LCL_VAR) && lvaGetDesc(operand->AsLclVar())->lvPromoted && !operand->AsLclVar()->IsMultiRegUse()) { lvaSetVarDoNotEnregister(operand->AsLclVar()->GetLclNum() DEBUGARG(DoNotEnregisterReason::SimdUserForcesDep)); From 87f66f7796bef7f7a106b9df82a002c0e6651f4b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 4 Jan 2023 23:04:30 -0800 Subject: [PATCH 008/125] Vector64 support --- src/coreclr/jit/hwintrinsic.cpp | 18 +++++++++--------- src/coreclr/jit/hwintrinsic.h | 3 +++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 2 ++ src/coreclr/jit/hwintrinsiclistarm64.h | 3 +++ src/coreclr/jit/lowerarmarch.cpp | 3 +++ src/coreclr/jit/lsraarm64.cpp | 3 ++- 6 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 5587cab0ce25ee..259b4e274bf9de 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1139,7 +1139,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound); #ifdef TARGET_ARM64 - if (intrinsic == NI_AdvSimd_Arm64_VectorTableLookup) + if ((intrinsic == NI_AdvSimd_VectorTableLookup) || (intrinsic == NI_AdvSimd_Arm64_VectorTableLookup)) { // check the number of fields present in op1 and set NI_AdvSimd_Arm64_VectorTableLookup_2, // NI_AdvSimd_Arm64_VectorTableLookup_3, etc. @@ -1184,11 +1184,11 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } // op1 = fieldList; - /*const CORINFO_FIELD_HANDLE field1 = - info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, 0); - unsigned fldOffset1 = info.compCompHnd->getFieldOffset(field1); - const CORINFO_FIELD_HANDLE field2 = info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, 1); - unsigned fldOffset2 = info.compCompHnd->getFieldOffset(field2);*/ + //const CORINFO_FIELD_HANDLE field1 = + // info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, 0); + //unsigned fldOffset1 = info.compCompHnd->getFieldOffset(field1); + //const CORINFO_FIELD_HANDLE field2 = info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, 1); + //unsigned fldOffset2 = info.compCompHnd->getFieldOffset(field2); switch (fieldCount) { @@ -1196,13 +1196,13 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // keep the intrinsic break; case 2: - intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_2; + intrinsic = (NamedIntrinsic)(intrinsic + 1); break; case 3: - intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_3; + intrinsic = (NamedIntrinsic)(intrinsic + 2); break; case 4: - intrinsic = NI_AdvSimd_Arm64_VectorTableLookup_4; + intrinsic = (NamedIntrinsic)(intrinsic + 3); break; default: noway_assert("Unknown field count"); diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 0a4b7a4d5e6af6..7ab19e514381d5 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -780,10 +780,13 @@ struct HWIntrinsicInfo { #ifdef TARGET_ARM64 // TODO-ARM64-NYI: Support hardware intrinsics operating on multiple contiguous registers. + case NI_AdvSimd_VectorTableLookup_2: case NI_AdvSimd_Arm64_VectorTableLookup_2: return 2; + case NI_AdvSimd_VectorTableLookup_3: case NI_AdvSimd_Arm64_VectorTableLookup_3: return 3; + case NI_AdvSimd_VectorTableLookup_4: case NI_AdvSimd_Arm64_VectorTableLookup_4: return 4; #endif diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index bdd8e7cc39a1d1..f53e8cc73e7ee1 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -424,6 +424,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) instruction ins = INS_invalid; switch (intrin.id) { + case NI_AdvSimd_VectorTableLookup_2: case NI_AdvSimd_Arm64_VectorTableLookup_2: ins = INS_tbl_2regs; break; @@ -493,6 +494,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrin.id) { + case NI_AdvSimd_VectorTableLookup_2: case NI_AdvSimd_Arm64_VectorTableLookup_2: if (intrin.op1->IsCopyOrReload()) { diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 1fd0e50104cc7d..48d3a61894236e 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -472,6 +472,9 @@ HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, {INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, {INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_2, 8, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_3, 8, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_4, 8, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, {INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor}, HW_Category_SIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, {INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 8364dd4cc3eefb..99134a3a499f95 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2580,16 +2580,19 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { switch (intrin.id) { + case NI_AdvSimd_VectorTableLookup_2: case NI_AdvSimd_Arm64_VectorTableLookup_2: comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 2; intrin.op1->AsLclVar()->SetMultiRegUse(); //MakeSrcContained(node, intrin.op1); break; + case NI_AdvSimd_VectorTableLookup_3: case NI_AdvSimd_Arm64_VectorTableLookup_3: comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 3; intrin.op1->AsLclVar()->SetMultiRegUse(); //MakeSrcContained(node, intrin.op1); break; + case NI_AdvSimd_VectorTableLookup_4: case NI_AdvSimd_Arm64_VectorTableLookup_4: comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 4; intrin.op1->AsLclVar()->SetMultiRegUse(); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index feea976ee63929..612f925355d17e 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1087,7 +1087,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou else { //TODO: Need to fix this reliably. - if ((intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_2)) + if ((intrin.id == NI_AdvSimd_VectorTableLookup_2) || + (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_2)) { assert(intrin.op1->OperIs(GT_LCL_VAR)); RefPosition* useRefPos1 = BuildUse(intrin.op1, RBM_NONE, 0, /* needsConsecutive */true); From 98e7bd2c969eb63e76cae305dcd79c128de34d8d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 4 Jan 2023 23:53:45 -0800 Subject: [PATCH 009/125] Add VectorTableLookup_3 --- src/coreclr/jit/hwintrinsic.cpp | 47 ++++++++-------- src/coreclr/jit/hwintrinsic.h | 5 -- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 14 +++++ src/coreclr/jit/lsraarm64.cpp | 54 +++++++++++++++---- .../Arm/AdvSimd.PlatformNotSupported.cs | 40 +++++++------- .../System/Runtime/Intrinsics/Arm/AdvSimd.cs | 40 +++++++------- .../ref/System.Runtime.Intrinsics.cs | 12 ++--- 7 files changed, 127 insertions(+), 85 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 259b4e274bf9de..89d4399d81ff8e 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1141,26 +1141,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, #ifdef TARGET_ARM64 if ((intrinsic == NI_AdvSimd_VectorTableLookup) || (intrinsic == NI_AdvSimd_Arm64_VectorTableLookup)) { - // check the number of fields present in op1 and set NI_AdvSimd_Arm64_VectorTableLookup_2, - // NI_AdvSimd_Arm64_VectorTableLookup_3, etc. op1 = impPopStack().val; ClassLayout* layout = op1->GetLayout(this); unsigned structSize = layout->GetSize(); unsigned slotCount = layout->GetSlotCount(); - - /*GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - for (unsigned i = 0; i < fieldCount; i++) - { - LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); - GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet()); - fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); - fieldLclNum++; - } - return fieldList;*/ - unsigned fieldCount = slotCount / info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); - - if (fieldCount > 1) + var_types typeOfLayout = layout->GetType(); + if (typeOfLayout == TYP_STRUCT) { + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); op1->AsLclVar()->SetMultiRegUse(); GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); int offset = 0; @@ -1176,19 +1164,17 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, lvaSetStruct(lclNum, classHandle, true); GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, TYP_SIMD16, offset); - - // varDsc->SetLayout(layout); - fieldList->AddField(this, fldNode, 0, TYP_SIMD16); + offset += 16; } // op1 = fieldList; - //const CORINFO_FIELD_HANDLE field1 = - // info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, 0); - //unsigned fldOffset1 = info.compCompHnd->getFieldOffset(field1); - //const CORINFO_FIELD_HANDLE field2 = info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, 1); - //unsigned fldOffset2 = info.compCompHnd->getFieldOffset(field2); + // const CORINFO_FIELD_HANDLE field1 = + // info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, 0); + // unsigned fldOffset1 = info.compCompHnd->getFieldOffset(field1); + // const CORINFO_FIELD_HANDLE field2 = info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, + // 1); unsigned fldOffset2 = info.compCompHnd->getFieldOffset(field2); switch (fieldCount) { @@ -1208,6 +1194,21 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, noway_assert("Unknown field count"); } } + else + { + assert(typeOfLayout == TYP_SIMD16); + } + + /*GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + for (unsigned i = 0; i < fieldCount; i++) + { + LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); + GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet()); + fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); + fieldLclNum++; + } + return fieldList;*/ + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); lvaGetDesc(op1->AsLclVar())->lvUsedInSIMDIntrinsic = false; } diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 7ab19e514381d5..095f5a6442f06f 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -810,12 +810,7 @@ struct HWIntrinsicInfo case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal: case NI_AdvSimd_Arm64_LoadPairVector128: case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal: - //case NI_AdvSimd_Arm64_VectorTableLookup_2: return 2; - //case NI_AdvSimd_Arm64_VectorTableLookup_3: - // return 3; - //case NI_AdvSimd_Arm64_VectorTableLookup_4: - // return 4; #endif default: diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index f53e8cc73e7ee1..b736655d947df0 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -429,6 +429,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) ins = INS_tbl_2regs; break; + case NI_AdvSimd_VectorTableLookup_3: + case NI_AdvSimd_Arm64_VectorTableLookup_3: + ins = INS_tbl_3regs; + break; + + case NI_AdvSimd_VectorTableLookup_4: + case NI_AdvSimd_Arm64_VectorTableLookup_4: + ins = INS_tbl_4regs; + break; + case NI_AdvSimd_AddWideningLower: assert(varTypeIsIntegral(intrin.baseType)); if (intrin.op1->TypeGet() == TYP_SIMD8) @@ -496,6 +506,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { case NI_AdvSimd_VectorTableLookup_2: case NI_AdvSimd_Arm64_VectorTableLookup_2: + case NI_AdvSimd_VectorTableLookup_3: + case NI_AdvSimd_Arm64_VectorTableLookup_3: + case NI_AdvSimd_VectorTableLookup_4: + case NI_AdvSimd_Arm64_VectorTableLookup_4: if (intrin.op1->IsCopyOrReload()) { GenTree* op1 = intrin.op1->AsCopyOrReload()->gtGetOp1(); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 612f925355d17e..6ced87ec931f87 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1086,22 +1086,54 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - //TODO: Need to fix this reliably. - if ((intrin.id == NI_AdvSimd_VectorTableLookup_2) || - (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_2)) + int regCount; + RefPosition* useRefPos1 = nullptr; + RefPosition* nextUseRefPos = nullptr; + switch (intrin.id) { - assert(intrin.op1->OperIs(GT_LCL_VAR)); - RefPosition* useRefPos1 = BuildUse(intrin.op1, RBM_NONE, 0, /* needsConsecutive */true); - useRefPos1->regCount = 2; - RefPosition* useRefPos2 = BuildUse(intrin.op1, RBM_NONE, 1, /* needsConsecutive */ true); - useRefPos2->regCount = 0; // Explicitely set it so we can identify that this is tail. - useRefPos1->nextConsecutiveRefPosition = useRefPos2; - srcCount+=2; + case NI_AdvSimd_VectorTableLookup_2: + case NI_AdvSimd_Arm64_VectorTableLookup_2: + regCount = 2; + break; + case NI_AdvSimd_VectorTableLookup_3: + case NI_AdvSimd_Arm64_VectorTableLookup_3: + regCount = 3; + break; + case NI_AdvSimd_VectorTableLookup_4: + case NI_AdvSimd_Arm64_VectorTableLookup_4: + regCount = 4; + break; + default: + regCount = 1; + break; } - else + + if (regCount == 1) { srcCount += BuildOperandUses(intrin.op1); } + else + { + assert(intrin.op1->OperIs(GT_LCL_VAR)); + + RefPosition* lastRefPos = nullptr; + // consecutive registers + for (int regIdx = 0; regIdx < regCount; regIdx++) + { + RefPosition* currRefPos = BuildUse(intrin.op1, RBM_NONE, regIdx, /* needsConsecutive */ true); + if (lastRefPos == nullptr) + { + currRefPos->regCount = regCount; + } + else + { + currRefPos->regCount = 0; // Explicitely set it so we can identify that this is non-first refposition. + lastRefPos->nextConsecutiveRefPosition = currRefPos; + } + lastRefPos = currRefPos; + } + srcCount += regCount; + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs index c5824deff0b769..2be2a05113dac6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs @@ -3673,17 +3673,17 @@ internal Arm64() { } /// public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } - ///// - ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - ///// - //public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } - ///// - ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - ///// - //public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } ///// ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) @@ -15014,17 +15014,17 @@ internal Arm64() { } /// public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } - ///// - ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - ///// - //public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } - ///// - ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - ///// - //public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } ///// ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index 669466a84d2f32..14428811d9fc0b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -3671,17 +3671,17 @@ internal Arm64() { } /// public static Vector128 VectorTableLookup(ValueTuple, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); - ///// - ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - ///// - //public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); - ///// - ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - ///// - //public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); ///// ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) @@ -15012,17 +15012,17 @@ internal Arm64() { } /// public static Vector64 VectorTableLookup(ValueTuple, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); - ///// - ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - ///// - //public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); - ///// - ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - ///// - //public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); ///// ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 2391ad67830f54..eb0fbee746bdf6 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2535,8 +2535,8 @@ public unsafe static void StoreSelectedScalar(ulong* address, System.Runtime.Int public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - //public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - //public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } //public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } //public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } @@ -3179,10 +3179,10 @@ public unsafe static void StorePairScalarNonTemporal(uint* address, System.Runti public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } From 786e350ade9d79f81916f708849b3c06f9298c23 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 5 Jan 2023 12:59:58 -0800 Subject: [PATCH 010/125] Add VectorTableLookup_4 --- src/coreclr/jit/lsra.cpp | 156 +++++++++--------- .../Arm/AdvSimd.PlatformNotSupported.cs | 42 ++--- .../System/Runtime/Intrinsics/Arm/AdvSimd.cs | 42 ++--- .../ref/System.Runtime.Intrinsics.cs | 8 +- 4 files changed, 122 insertions(+), 126 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 78508e08ed3edc..d5714d2a004086 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2799,97 +2799,83 @@ regNumber LinearScan::allocateReg(Interval* referentInterval, } regNumber regToReturn = genRegNumFromMask(foundRegBit); - //do - //{ - currentInterval = refPosition->getInterval(); - regNumber foundReg = genRegNumFromMask(foundRegBit); - RegRecord* availablePhysRegRecord = getRegisterRecord(foundReg); - Interval* assignedInterval = availablePhysRegRecord->assignedInterval; - if ((assignedInterval != currentInterval) && - isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) + currentInterval = refPosition->getInterval(); + regNumber foundReg = genRegNumFromMask(foundRegBit); + RegRecord* availablePhysRegRecord = getRegisterRecord(foundReg); + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + if ((assignedInterval != currentInterval) && + isAssigned(availablePhysRegRecord ARM_ARG(getRegisterType(currentInterval, refPosition)))) + { + if (regSelector->isSpilling()) { - if (regSelector->isSpilling()) - { - // We're spilling. - CLANG_FORMAT_COMMENT_ANCHOR; + // We're spilling. + CLANG_FORMAT_COMMENT_ANCHOR; #ifdef TARGET_ARM - if (currentInterval->registerType == TYP_DOUBLE) - { - assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); - unassignDoublePhysReg(availablePhysRegRecord); - } - else if (assignedInterval->registerType == TYP_DOUBLE) - { - // Make sure we spill both halves of the double register. - assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); - unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); - } - else + if (currentInterval->registerType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); + unassignDoublePhysReg(availablePhysRegRecord); + } + else if (assignedInterval->registerType == TYP_DOUBLE) + { + // Make sure we spill both halves of the double register. + assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); + unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); + } + else #endif - { - unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); - } + { + unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); + } + } + else + { + // If we considered this "unassigned" because this interval's lifetime ends before + // the next ref, remember it. + // For historical reasons (due to former short-circuiting of this case), if we're reassigning + // the current interval to a previous assignment, we don't remember the previous interval. + // Note that we need to compute this condition before calling unassignPhysReg, which wil reset + // assignedInterval->physReg. + bool wasAssigned = regSelector->foundUnassignedReg() && (assignedInterval != nullptr) && + (assignedInterval->physReg == foundReg); + unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); + if (regSelector->isMatchingConstant() && compiler->opts.OptimizationEnabled()) + { + assert(assignedInterval->isConstant); + refPosition->treeNode->SetReuseRegVal(); + } + else if (wasAssigned) + { + updatePreviousInterval(availablePhysRegRecord, assignedInterval, assignedInterval->registerType); } else { - // If we considered this "unassigned" because this interval's lifetime ends before - // the next ref, remember it. - // For historical reasons (due to former short-circuiting of this case), if we're reassigning - // the current interval to a previous assignment, we don't remember the previous interval. - // Note that we need to compute this condition before calling unassignPhysReg, which wil reset - // assignedInterval->physReg. - bool wasAssigned = regSelector->foundUnassignedReg() && (assignedInterval != nullptr) && - (assignedInterval->physReg == foundReg); - unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); - if (regSelector->isMatchingConstant() && compiler->opts.OptimizationEnabled()) - { - assert(assignedInterval->isConstant); - refPosition->treeNode->SetReuseRegVal(); - } - else if (wasAssigned) - { - updatePreviousInterval(availablePhysRegRecord, assignedInterval, assignedInterval->registerType); - } - else - { - assert(!regSelector->isConstAvailable()); - } + assert(!regSelector->isConstAvailable()); } } + } - // At this point, we need to make sure that other `regCount` registers are available and then just allocate them - // to subsequent refpositions. - assignPhysReg(availablePhysRegRecord, currentInterval); - refPosition->registerAssignment = foundRegBit; - - - // // Next iteration - // foundRegBit <<= 1; - // refPosition = refPosition->nextConsecutiveRefPosition; - - //} while ((refPosition != nullptr) && (refPosition->needsConsecutive)); + // At this point, we need to make sure that other `regCount` registers are available and then just allocate them + // to subsequent refpositions. + assignPhysReg(availablePhysRegRecord, currentInterval); + refPosition->registerAssignment = foundRegBit; - RefPosition* consecutiveRefPosition = refPosition->nextConsecutiveRefPosition; - while (consecutiveRefPosition != nullptr) + if (refPosition->needsConsecutive && (refPosition->regCount != 0)) { - //TODO: Unassign anything else for this register. - foundRegBit <<= 1; - foundReg = genRegNumFromMask(foundRegBit); - /*Interval* consecutiveInterval = consecutiveRefPosition->getInterval(); - if (consecutiveInterval->physReg != foundReg) + // We only set this once for remaining refpositions. + RefPosition* consecutiveRefPosition = refPosition->nextConsecutiveRefPosition; + while (consecutiveRefPosition != nullptr) { - consecutiveInterval->isActive = false; - unassignPhysReg(consecutiveInterval->physReg); - consecutiveInterval->isActive = true; - }*/ + // TODO: Unassign anything else for this register. + foundRegBit <<= 1; + foundReg = genRegNumFromMask(foundRegBit); - consecutiveRefPosition->registerAssignment = foundRegBit; - - - consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; + consecutiveRefPosition->registerAssignment = foundRegBit; + consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; + } } - + return regToReturn; } @@ -3120,10 +3106,6 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) // We *must* allocate a register, and it will be a copyReg. Set that field now, so that // refPosition->RegOptional() will return false. refPosition->copyReg = true; - //if (refPosition->getMultiRegIdx() != 0) - //{ - // refPosition->multiRegIdx = 0; - //} RegisterScore registerScore = NONE; regNumber allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); @@ -5329,6 +5311,8 @@ void LinearScan::allocateRegisters() if ((assignedRegister != REG_NA)) { + + RegRecord* physRegRecord = getRegisterRecord(assignedRegister); assert((assignedRegBit == currentRefPosition.registerAssignment) || (physRegRecord->assignedInterval == currentInterval) || @@ -5377,7 +5361,19 @@ void LinearScan::allocateRegisters() lastAllocatedRefPosition = ¤tRefPosition; regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + + if (currentRefPosition.needsConsecutive) + { + // For consecutive register, it doesn't matter what the assigned register was. + // We have just assigned it `copyRegMask` and that's the one in-use, and not the + // one that was assigned previously. + assignedRegMask = REG_NA; + + // This should never be the first refposition of the series. + assert(currentRefPosition.multiRegIdx != 0); + } regsInUseThisLocation |= copyRegMask | assignedRegMask; + if (currentRefPosition.lastUse) { if (currentRefPosition.delayRegFree) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs index 2be2a05113dac6..e7441ae10e1e79 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs @@ -3685,17 +3685,17 @@ internal Arm64() { } /// public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } - ///// - ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - ///// - //public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } - ///// - ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - ///// - //public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) @@ -15026,17 +15026,17 @@ internal Arm64() { } /// public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } - ///// - ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - ///// - //public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } - - ///// - ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - ///// - //public static Vector64 VectorTableLookup((Vector64, Vector64, Vector64, Vector64) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index 14428811d9fc0b..fa9e8d9fb89fe7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -3683,17 +3683,17 @@ internal Arm64() { } /// public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); - ///// - ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - ///// - //public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); - ///// - ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - ///// - //public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) @@ -15024,17 +15024,17 @@ internal Arm64() { } /// public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); - ///// - ///// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B - ///// - //public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); - - ///// - ///// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - ///// A64: TBL Vd.16B, {Vn.16B}, Vm.16B - ///// - //public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + /// + /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + + /// + /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// + public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index eb0fbee746bdf6..0a9fbf7b4afabf 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2537,8 +2537,8 @@ public unsafe static void StoreSelectedScalar(ulong* address, System.Runtime.Int public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - //public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - //public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } @@ -3181,8 +3181,8 @@ public unsafe static void StorePairScalarNonTemporal(uint* address, System.Runti public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - //public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } From e0a82b3c5c4786cb68832ef4a4d530c1124e0f47 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 5 Jan 2023 13:23:36 -0800 Subject: [PATCH 011/125] cleanup --- src/coreclr/jit/hwintrinsic.h | 3 --- src/coreclr/jit/lsra.cpp | 27 +++++++-------------------- src/coreclr/jit/lsra.h | 13 ++++--------- src/coreclr/jit/lsraarm64.cpp | 1 - src/coreclr/jit/treelifeupdater.cpp | 21 ++------------------- 5 files changed, 13 insertions(+), 52 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 095f5a6442f06f..5b60517d161afd 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -775,11 +775,9 @@ struct HWIntrinsicInfo static int GetMultiRegUseCount(NamedIntrinsic id) { - //assert(IsMultiRegU) switch (id) { #ifdef TARGET_ARM64 - // TODO-ARM64-NYI: Support hardware intrinsics operating on multiple contiguous registers. case NI_AdvSimd_VectorTableLookup_2: case NI_AdvSimd_Arm64_VectorTableLookup_2: return 2; @@ -790,7 +788,6 @@ struct HWIntrinsicInfo case NI_AdvSimd_Arm64_VectorTableLookup_4: return 4; #endif - default: unreached(); } diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d5714d2a004086..ca48d479ea24db 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2178,7 +2178,7 @@ void LinearScan::checkLastUses(BasicBlock* block) { // We should never see ParamDefs or ZeroInits within a basic block. assert(currentRefPosition->refType != RefTypeParamDef && currentRefPosition->refType != RefTypeZeroInit); - if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isLocalVar /*&& !currentRefPosition->needsConsecutive*/) + if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isLocalVar) { unsigned varNum = currentRefPosition->getInterval()->varNum; unsigned varIndex = currentRefPosition->getInterval()->getVarIndex(compiler); @@ -2215,22 +2215,8 @@ void LinearScan::checkLastUses(BasicBlock* block) foundDiff = true; } - //if (currentRefPosition->needsConsecutive) - //{ - // // If this is a case of consecutive registers, refPositions are added so they get register - // // They may not be bbLiveIn but are just used directly as operand. Only add them in computedLive - // // if they were part of bbLiveIn. - // if(VarSetOps::IsMember(compiler, block->bbLiveIn, varIndex)) - // { - // JITDUMP("++ V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); - // VarSetOps::AddElemD(compiler, computedLive, varIndex); - // } - //} - //else - { - JITDUMP("++ V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); - VarSetOps::AddElemD(compiler, computedLive, varIndex); - } + JITDUMP("++ V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); + VarSetOps::AddElemD(compiler, computedLive, varIndex); } else if (currentRefPosition->lastUse) { @@ -2245,7 +2231,7 @@ void LinearScan::checkLastUses(BasicBlock* block) if (currentRefPosition->refType == RefTypeDef || currentRefPosition->refType == RefTypeDummyDef) { - JITDUMP("-- V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); + JITDUMP("-- V%02u from computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); VarSetOps::RemoveElemD(compiler, computedLive, varIndex); } } @@ -3765,14 +3751,15 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition) regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) { regMaskTP result = candidates & m_AvailableRegs; - if (!refPosition->needsConsecutive) + if (!refPosition->needsConsecutive || (refPosition->multiRegIdx != 0)) { return result; } + assert(refPosition->regCount != 0); + // If refPosition->multiRegIdx == 0, we need to make sure we check for all the // `regCount` available regs. - // Once we do that just allocate consecutively. result &= (m_AvailableRegs >> (refPosition->regCount - 1)); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index d7ea7986330c03..5cd0364a3d8317 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1184,10 +1184,10 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ - - -#if !defined(TARGET_ARM64) - regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) +#if defined(TARGET_ARM64) + regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); +#else + regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) { regMaskTP result = candidates & m_AvailableRegs; #ifdef TARGET_ARM @@ -1198,13 +1198,8 @@ class LinearScan : public LinearScanInterface result &= (m_AvailableRegs >> 1); } #endif // TARGET_ARM -#ifdef TARGET_ARM64 - -#endif // TARGET_ARM64 return result; } -#else - regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); #endif #ifdef DEBUG diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 6ced87ec931f87..58b6945798a9f9 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1066,7 +1066,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { simdRegToSimdRegMove = varTypeIsFloating(intrinsicTree); } - // If we have an RMW intrinsic or an intrinsic with simple move semantic between two SIMD registers, // we want to preference op1Reg to the target if op1 is not contained. diff --git a/src/coreclr/jit/treelifeupdater.cpp b/src/coreclr/jit/treelifeupdater.cpp index a14c87c236f199..c3e5076d728951 100644 --- a/src/coreclr/jit/treelifeupdater.cpp +++ b/src/coreclr/jit/treelifeupdater.cpp @@ -38,8 +38,8 @@ template bool TreeLifeUpdater::UpdateLifeFieldVar(GenTreeLclVar* lclNode, unsigned multiRegIndex) { LclVarDsc* parentVarDsc = compiler->lvaGetDesc(lclNode); - /* assert(parentVarDsc->lvPromoted && (multiRegIndex < parentVarDsc->lvFieldCnt) && lclNode->IsMultiReg() && - compiler->lvaEnregMultiRegVars);*/ + assert(parentVarDsc->lvPromoted && (multiRegIndex < parentVarDsc->lvFieldCnt) && lclNode->IsMultiReg() && + compiler->lvaEnregMultiRegVars); unsigned fieldVarNum = parentVarDsc->lvFieldLclStart + multiRegIndex; LclVarDsc* fldVarDsc = compiler->lvaGetDesc(fieldVarNum); assert(fldVarDsc->lvTracked); @@ -268,23 +268,6 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) { VarSetOps::AddElemD(compiler, stackVarDeltaSet, varDsc->lvVarIndex); } - - if (lclVarTree->IsMultiRegLclVar()) - { - unsigned firstFieldVarNum = varDsc->lvFieldLclStart; - - for (unsigned i = 0; i < 2; ++i) - { - LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(firstFieldVarNum + i); - bool isInReg = fieldVarDsc->lvIsInReg() && tree->GetRegByIndex(i) != REG_NA; - VarSetOps::AddElemD(compiler, varDeltaSet, fieldVarDsc->lvVarIndex); - - if (isInReg) - { - compiler->codeGen->genUpdateRegLife(fieldVarDsc, isBorn, isDying DEBUGARG(tree)); - } - } - } } } else if (ForCodeGen && lclVarTree->IsMultiRegLclVar()) From 47848b01d1a6afcdc87189930f2fd8786f06d4a5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 5 Jan 2023 13:36:39 -0800 Subject: [PATCH 012/125] Remove regCount from LclVarDsc --- src/coreclr/jit/compiler.h | 1 - src/coreclr/jit/gentree.cpp | 2 +- src/coreclr/jit/lowerarmarch.cpp | 9 +++------ 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3b5f2ee35f0d4a..683cb16ec4a334 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -668,7 +668,6 @@ class LclVarDsc union { unsigned char lvFieldCnt; // Number of fields in the promoted VarDsc. - unsigned char regCount; }; unsigned char lvFldOffset; unsigned char lvFldOrdinal; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ada996b3868e3c..1166734773963d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -808,7 +808,7 @@ int GenTree::GetRegisterDstCount(Compiler* compiler) const #ifdef FEATURE_HW_INTRINSICS if (AsLclVar()->IsMultiRegUse()) { - return compiler->lvaGetDesc(AsLclVar())->regCount; + return compiler->lvaGetDesc(AsLclVar())->lvFieldCnt; } #endif // FEATURE_HW_INTRINSICS return AsLclVar()->GetFieldCount(compiler); diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 99134a3a499f95..5a226be357ae24 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2582,21 +2582,18 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { case NI_AdvSimd_VectorTableLookup_2: case NI_AdvSimd_Arm64_VectorTableLookup_2: - comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 2; + assert(comp->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 2); intrin.op1->AsLclVar()->SetMultiRegUse(); - //MakeSrcContained(node, intrin.op1); break; case NI_AdvSimd_VectorTableLookup_3: case NI_AdvSimd_Arm64_VectorTableLookup_3: - comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 3; + assert(comp->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 3); intrin.op1->AsLclVar()->SetMultiRegUse(); - //MakeSrcContained(node, intrin.op1); break; case NI_AdvSimd_VectorTableLookup_4: case NI_AdvSimd_Arm64_VectorTableLookup_4: - comp->lvaGetDesc(intrin.op1->AsLclVar())->regCount = 4; + assert(comp->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 4); intrin.op1->AsLclVar()->SetMultiRegUse(); - //MakeSrcContained(node, intrin.op1); break; case NI_AdvSimd_DuplicateSelectedScalarToVector64: case NI_AdvSimd_DuplicateSelectedScalarToVector128: From 25a738d2c59def864b62fce992adf9aad011b402 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 5 Jan 2023 13:43:32 -0800 Subject: [PATCH 013/125] Some more cleanup Some more cleanup --- src/coreclr/jit/codegenlinear.cpp | 3 +- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 4 +- src/coreclr/jit/gentree.cpp | 1 - src/coreclr/jit/hwintrinsic.cpp | 19 +------ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 4 ++ src/coreclr/jit/hwintrinsiclistarm64.h | 2 +- src/coreclr/jit/lsra.cpp | 56 +++++---------------- src/coreclr/jit/lsrabuild.cpp | 29 +++-------- 9 files changed, 29 insertions(+), 91 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 6408b8baf768ff..7fdbe014806031 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1444,7 +1444,7 @@ regNumber CodeGen::genConsumeReg(GenTree* tree, unsigned multiRegIndex) assert(lcl->IsMultiReg()); LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); - //assert(varDsc->lvPromoted); + assert(varDsc->lvPromoted); assert(multiRegIndex < varDsc->lvFieldCnt); unsigned fieldVarNum = varDsc->lvFieldLclStart + multiRegIndex; LclVarDsc* fldVarDsc = compiler->lvaGetDesc(fieldVarNum); @@ -1541,7 +1541,6 @@ regNumber CodeGen::genConsumeReg(GenTree* tree) GenTreeLclVar* lcl = tree->gtSkipReloadOrCopy()->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lcl); unsigned firstFieldVarNum = varDsc->lvFieldLclStart; - for (unsigned i = 0; i < varDsc->lvFieldCnt; ++i) { LclVarDsc* fldVarDsc = compiler->lvaGetDesc(firstFieldVarNum + i); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 3b764ca05681dc..68483414fb7f79 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -9728,7 +9728,7 @@ void cTreeFlags(Compiler* comp, GenTree* tree) { chars += printf("[CALL]"); } - switch (op) + switch (op) { case GT_MUL: case GT_CAST: diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 683cb16ec4a334..1df7ae4841fd89 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -666,9 +666,7 @@ class LclVarDsc // Valid on promoted struct local fields. }; - union { - unsigned char lvFieldCnt; // Number of fields in the promoted VarDsc. - }; + unsigned char lvFieldCnt; // Number of fields in the promoted VarDsc. unsigned char lvFldOffset; unsigned char lvFldOrdinal; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 1166734773963d..914a9d772fb851 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -870,7 +870,6 @@ bool GenTree::IsMultiRegNode() const { return true; } - return false; } diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 89d4399d81ff8e..de98959e005bb5 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1168,14 +1168,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, offset += 16; } - // op1 = fieldList; - - // const CORINFO_FIELD_HANDLE field1 = - // info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, 0); - // unsigned fldOffset1 = info.compCompHnd->getFieldOffset(field1); - // const CORINFO_FIELD_HANDLE field2 = info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, - // 1); unsigned fldOffset2 = info.compCompHnd->getFieldOffset(field2); - + switch (fieldCount) { case 1: @@ -1199,16 +1192,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, assert(typeOfLayout == TYP_SIMD16); } - /*GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - for (unsigned i = 0; i < fieldCount; i++) - { - LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); - GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet()); - fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet()); - fieldLclNum++; - } - return fieldList;*/ - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); lvaGetDesc(op1->AsLclVar())->lvUsedInSIMDIntrinsic = false; } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index b736655d947df0..8cb34e3b7a3041 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -512,6 +512,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_Arm64_VectorTableLookup_4: if (intrin.op1->IsCopyOrReload()) { + // If value is copied in a register to satisfy the consecutive-register + // requirement, make sure to get the source's register because these + // instruction encoding takes only the 1st register and infer the rest + // from that. GenTree* op1 = intrin.op1->AsCopyOrReload()->gtGetOp1(); assert(!op1->IsCopyOrReload()); op1Reg = op1->GetRegNum(); diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 48d3a61894236e..50a838b041b483 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -471,7 +471,7 @@ HARDWARE_INTRINSIC(AdvSimd, SubtractSaturateScalar, HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sub, INS_sub, INS_fsub, INS_fsub}, HW_Category_SIMD, HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, {INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, {INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_2, 8, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_3, 8, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_4, 8, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index ca48d479ea24db..b1b437f6cfe111 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2772,20 +2772,15 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // no such ref position, no register will be allocated. // -regNumber LinearScan::allocateReg(Interval* referentInterval, +regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { - Interval* currentInterval = refPosition->getInterval(); - assert(referentInterval == currentInterval); - regMaskTP foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); if (foundRegBit == RBM_NONE) { return REG_NA; } - regNumber regToReturn = genRegNumFromMask(foundRegBit); - currentInterval = refPosition->getInterval(); regNumber foundReg = genRegNumFromMask(foundRegBit); RegRecord* availablePhysRegRecord = getRegisterRecord(foundReg); Interval* assignedInterval = availablePhysRegRecord->assignedInterval; @@ -2824,7 +2819,7 @@ regNumber LinearScan::allocateReg(Interval* referentInterval, // Note that we need to compute this condition before calling unassignPhysReg, which wil reset // assignedInterval->physReg. bool wasAssigned = regSelector->foundUnassignedReg() && (assignedInterval != nullptr) && - (assignedInterval->physReg == foundReg); + (assignedInterval->physReg == foundReg); unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); if (regSelector->isMatchingConstant() && compiler->opts.OptimizationEnabled()) { @@ -2853,16 +2848,13 @@ regNumber LinearScan::allocateReg(Interval* referentInterval, RefPosition* consecutiveRefPosition = refPosition->nextConsecutiveRefPosition; while (consecutiveRefPosition != nullptr) { - // TODO: Unassign anything else for this register. foundRegBit <<= 1; - foundReg = genRegNumFromMask(foundRegBit); - consecutiveRefPosition->registerAssignment = foundRegBit; consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; } } - return regToReturn; + return foundReg; } //------------------------------------------------------------------------ @@ -4663,12 +4655,6 @@ void LinearScan::allocateRegisters() for (RefPosition& currentRefPosition : refPositions) { - //TODO: Add logic to skip past consecutive registers refPositions. - //if (currentRefPosition.needsConsecutive && currentRefPosition.regCount == 0) - //{ - // continue; - //} - //while (currentRefPosition) RefPosition* nextRefPosition = currentRefPosition.nextRefPosition; // TODO: Can we combine this with the freeing of registers below? It might @@ -5189,7 +5175,7 @@ void LinearScan::allocateRegisters() regMaskTP assignedRegBit = RBM_NONE; bool isInRegister = false; - if ((assignedRegister != REG_NA) /*&& !currentRefPosition.needsConsecutive*/) + if (assignedRegister != REG_NA) { isInRegister = true; assignedRegBit = genRegMask(assignedRegister); @@ -5222,7 +5208,7 @@ void LinearScan::allocateRegisters() assert(previousRefPosition->nextRefPosition == ¤tRefPosition); assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment || currentRefPosition.outOfOrder || previousRefPosition->copyReg || - previousRefPosition->refType == RefTypeExpUse || currentRefPosition.refType == RefTypeDummyDef || currentRefPosition.needsConsecutive); + previousRefPosition->refType == RefTypeExpUse || currentRefPosition.refType == RefTypeDummyDef); } else if (assignedRegister != REG_NA) { @@ -5296,7 +5282,7 @@ void LinearScan::allocateRegisters() } } - if ((assignedRegister != REG_NA)) + if (assignedRegister != REG_NA) { @@ -5319,7 +5305,7 @@ void LinearScan::allocateRegisters() setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } - else if (((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0)) + else if ((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) { currentRefPosition.registerAssignment = assignedRegBit; if (!currentInterval->isActive) @@ -5432,15 +5418,6 @@ void LinearScan::allocateRegisters() { // TODO: Unassign anything else for this register. registerBit <<= 1; - regNumber foundReg = genRegNumFromMask(registerBit); - /*Interval* consecutiveInterval = consecutiveRefPosition->getInterval(); - if (consecutiveInterval->physReg != foundReg) - { - consecutiveInterval->isActive = false; - unassignPhysReg(consecutiveInterval->physReg); - consecutiveInterval->isActive = true; - }*/ - consecutiveRefPosition->registerAssignment = registerBit; consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; @@ -5462,7 +5439,6 @@ void LinearScan::allocateRegisters() assignedRegister = REG_NA; } } - } if (assignedRegister == REG_NA) @@ -5555,7 +5531,7 @@ void LinearScan::allocateRegisters() // If we allocated a register, and this is a use of a spilled value, // it should have been marked for reload above. - if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister && !currentRefPosition.needsConsecutive) + if (assignedRegister != REG_NA && RefTypeIsUse(refType) && !isInRegister) { assert(currentRefPosition.reload); } @@ -5891,7 +5867,7 @@ void LinearScan::writeLocalReg(GenTreeLclVar* lclNode, unsigned varNum, regNumbe { assert(compiler->lvaEnregMultiRegVars); LclVarDsc* parentVarDsc = compiler->lvaGetDesc(lclNode); - assert(parentVarDsc->lvPromoted || lclNode->IsMultiRegUse()); + assert(parentVarDsc->lvPromoted); unsigned regIndex = varNum - parentVarDsc->lvFieldLclStart; assert(regIndex < MAX_MULTIREG_COUNT); lclNode->SetRegNumByIdx(reg, regIndex); @@ -6041,7 +6017,7 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTreeLclVar* treeNode, Ref if (reload) { assert(currentRefPosition->refType != RefTypeDef); - assert(interval->isSpilled || currentRefPosition->needsConsecutive); + assert(interval->isSpilled); varDsc->SetRegNum(REG_STK); if (!spillAfter) { @@ -6366,11 +6342,7 @@ void LinearScan::insertCopyOrReload(BasicBlock* block, GenTree* tree, unsigned m // Insert the copy/reload after the spilled node and replace the use of the original node with a use // of the copy/reload. blockRange.InsertAfter(tree, newNode); - - //if (multiRegIdx == 0) - { - treeUse.ReplaceWith(newNode); - } + treeUse.ReplaceWith(newNode); } } @@ -12077,12 +12049,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, reverseSelect = linearScan->doReverseSelect(); #endif // DEBUG -#if defined(TARGET_ARM) - freeCandidates = linearScan->getFreeCandidates(candidates, regType); -#elif defined(TARGET_ARM64) +#if defined(TARGET_ARM64) freeCandidates = linearScan->getFreeCandidates(candidates, refPosition); #else - freeCandidates = linearScan->getFreeCandidates(candidates); + freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); #endif // TARGET_ARM // If no free candidates, then double check if refPosition is an actual ref. diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 6d7afead531b10..6fbcff364f9d2e 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1256,32 +1256,17 @@ bool LinearScan::isCandidateMultiRegLclVar(GenTreeLclVar* lclNode) { assert(compiler->lvaEnregMultiRegVars && lclNode->IsMultiReg()); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); - bool isMultiReg = false; - if (lclNode->IsMultiReg()) + assert(varDsc->lvPromoted); + bool isMultiReg = (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT); + if (!isMultiReg) { - if (!lclNode->IsMultiRegUse()) - { - assert(varDsc->lvPromoted); - bool isMultiReg = (compiler->lvaGetPromotionType(varDsc) == Compiler::PROMOTION_TYPE_INDEPENDENT); - if (!isMultiReg) - { - lclNode->ClearMultiReg(); - } - } - else - { - isMultiReg = true; - } + lclNode->ClearMultiReg(); } - #ifdef DEBUG - if (!lclNode->IsMultiRegUse()) + for (unsigned int i = 0; i < varDsc->lvFieldCnt; i++) { - for (unsigned int i = 0; i < varDsc->lvFieldCnt; i++) - { - LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i); - assert(isCandidateVar(fieldVarDsc) == isMultiReg); - } + LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(varDsc->lvFieldLclStart + i); + assert(isCandidateVar(fieldVarDsc) == isMultiReg); } #endif // DEBUG return isMultiReg; From e4928297d6b42f0933e0cc4ef8360a8fc31bf4ea Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 5 Jan 2023 14:53:42 -0800 Subject: [PATCH 014/125] setNextConsecutiveRegisterAssignment --- src/coreclr/jit/lsra.cpp | 52 ++++++----------------------------- src/coreclr/jit/lsra.h | 1 + src/coreclr/jit/lsraarm64.cpp | 33 ++++++++++++++++++++++ 3 files changed, 42 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index b1b437f6cfe111..e9f9e23a402eea 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2772,7 +2772,7 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // no such ref position, no register will be allocated. // -regNumber LinearScan::allocateReg(Interval* currentInterval, +regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { regMaskTP foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); @@ -2842,17 +2842,12 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, assignPhysReg(availablePhysRegRecord, currentInterval); refPosition->registerAssignment = foundRegBit; - if (refPosition->needsConsecutive && (refPosition->regCount != 0)) +#ifdef TARGET_ARM64 + if (refPosition->needsConsecutive && (refPosition->multiRegIdx == 0)) { - // We only set this once for remaining refpositions. - RefPosition* consecutiveRefPosition = refPosition->nextConsecutiveRefPosition; - while (consecutiveRefPosition != nullptr) - { - foundRegBit <<= 1; - consecutiveRefPosition->registerAssignment = foundRegBit; - consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; - } + setNextConsecutiveRegisterAssignment(refPosition, foundRegBit); } +#endif // TARGET_ARM64 return foundReg; } @@ -3739,26 +3734,6 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition) nullptr)); } -#ifdef TARGET_ARM64 -regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) -{ - regMaskTP result = candidates & m_AvailableRegs; - if (!refPosition->needsConsecutive || (refPosition->multiRegIdx != 0)) - { - return result; - } - - assert(refPosition->regCount != 0); - - // If refPosition->multiRegIdx == 0, we need to make sure we check for all the - // `regCount` available regs. - - result &= (m_AvailableRegs >> (refPosition->regCount - 1)); - - return result; -} -#endif - //------------------------------------------------------------------------ // processBlockEndAllocation: Update var locations after 'currentBlock' has been allocated // @@ -5284,8 +5259,6 @@ void LinearScan::allocateRegisters() if (assignedRegister != REG_NA) { - - RegRecord* physRegRecord = getRegisterRecord(assignedRegister); assert((assignedRegBit == currentRefPosition.registerAssignment) || (physRegRecord->assignedInterval == currentInterval) || @@ -5346,7 +5319,6 @@ void LinearScan::allocateRegisters() assert(currentRefPosition.multiRegIdx != 0); } regsInUseThisLocation |= copyRegMask | assignedRegMask; - if (currentRefPosition.lastUse) { if (currentRefPosition.delayRegFree) @@ -5400,6 +5372,7 @@ void LinearScan::allocateRegisters() } } +#ifdef TARGET_ARM64 if (currentRefPosition.needsConsecutive) { // For consecutive register, we would like to assign a register (if not already assigned) @@ -5412,16 +5385,7 @@ void LinearScan::allocateRegisters() // subsequent registers to remaining position and skip the allocation for the // 1st position altogether. - RefPosition* consecutiveRefPosition = currentRefPosition.nextConsecutiveRefPosition; - regMaskTP registerBit = assignedRegBit; - while (consecutiveRefPosition != nullptr) - { - // TODO: Unassign anything else for this register. - registerBit <<= 1; - consecutiveRefPosition->registerAssignment = registerBit; - - consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; - } + setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegBit); } } else @@ -5440,7 +5404,7 @@ void LinearScan::allocateRegisters() } } } - +#endif // TARGET_ARM64 if (assignedRegister == REG_NA) { if (currentRefPosition.RegOptional()) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 5cd0364a3d8317..3a659c4f03f112 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1186,6 +1186,7 @@ class LinearScan : public LinearScanInterface #if defined(TARGET_ARM64) regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); + void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regMaskTP firstRegAssigned); #else regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) { diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 58b6945798a9f9..2041ea1336f755 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -25,6 +25,39 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "sideeffects.h" #include "lower.h" +regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) +{ + regMaskTP result = candidates & m_AvailableRegs; + if (!refPosition->needsConsecutive || (refPosition->multiRegIdx != 0)) + { + return result; + } + + assert(refPosition->regCount != 0); + + // If refPosition->multiRegIdx == 0, we need to make sure we check for all the + // `regCount` available regs. + + result &= (m_AvailableRegs >> (refPosition->regCount - 1)); + + return result; +} + +void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regMaskTP firstRegAssigned) +{ + assert(isSingleRegister(firstRegAssigned)); + assert(firstRefPosition->needsConsecutive && firstRefPosition->multiRegIdx == 0); + + RefPosition* consecutiveRefPosition = firstRefPosition->nextConsecutiveRefPosition; + regMaskTP registerToAssign = firstRegAssigned; + while (consecutiveRefPosition != nullptr) + { + registerToAssign <<= 1; + consecutiveRefPosition->registerAssignment = registerToAssign; + consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; + } +} + //------------------------------------------------------------------------ // BuildNode: Build the RefPositions for a node // From 385abf1e35882a54b130be80331c2eff7dbe1162 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 6 Jan 2023 05:39:56 -0800 Subject: [PATCH 015/125] Some more cleanup --- src/coreclr/jit/hwintrinsic.cpp | 9 ++++++++- src/coreclr/jit/hwintrinsiclistarm64.h | 12 ++++++------ src/coreclr/jit/lowerarmarch.cpp | 15 -------------- src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsraarm64.cpp | 27 +++++++++++++++++++++++++- 5 files changed, 41 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index de98959e005bb5..93435395bb200d 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1172,15 +1172,22 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, switch (fieldCount) { case 1: - // keep the intrinsic + // NI_AdvSimd_VectorTableLookup + // NI_AdvSimd_Arm64_VectorTableLookup break; case 2: + //NI_AdvSimd_VectorTableLookup_2 + //NI_AdvSimd_Arm64_VectorTableLookup_2 intrinsic = (NamedIntrinsic)(intrinsic + 1); break; case 3: + // NI_AdvSimd_VectorTableLookup_3 + // NI_AdvSimd_Arm64_VectorTableLookup_3 intrinsic = (NamedIntrinsic)(intrinsic + 2); break; case 4: + // NI_AdvSimd_VectorTableLookup_4 + // NI_AdvSimd_Arm64_VectorTableLookup_4 intrinsic = (NamedIntrinsic)(intrinsic + 3); break; default: diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 50a838b041b483..d8a0add3c518f4 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -472,9 +472,9 @@ HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, {INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, {INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_2, 8, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_3, 8, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_4, 8, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_2, 8, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_3, 8, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_4, 8, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, {INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor}, HW_Category_SIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, {INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) @@ -649,9 +649,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, {INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_2, 16, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_3, 16, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_4, 16, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_2, 16, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_3, 16, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_4, 16, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, {INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, {INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1}, HW_Category_SIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 5a226be357ae24..a32c33113c2087 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -2580,21 +2580,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { switch (intrin.id) { - case NI_AdvSimd_VectorTableLookup_2: - case NI_AdvSimd_Arm64_VectorTableLookup_2: - assert(comp->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 2); - intrin.op1->AsLclVar()->SetMultiRegUse(); - break; - case NI_AdvSimd_VectorTableLookup_3: - case NI_AdvSimd_Arm64_VectorTableLookup_3: - assert(comp->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 3); - intrin.op1->AsLclVar()->SetMultiRegUse(); - break; - case NI_AdvSimd_VectorTableLookup_4: - case NI_AdvSimd_Arm64_VectorTableLookup_4: - assert(comp->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 4); - intrin.op1->AsLclVar()->SetMultiRegUse(); - break; case NI_AdvSimd_DuplicateSelectedScalarToVector64: case NI_AdvSimd_DuplicateSelectedScalarToVector128: case NI_AdvSimd_Extract: diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index e9f9e23a402eea..23075287c2716e 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5377,7 +5377,7 @@ void LinearScan::allocateRegisters() { // For consecutive register, we would like to assign a register (if not already assigned) // to the 1st position and the subsequent positions will just get the consecutive register. - if (currentRefPosition.multiRegIdx == 0) + if (currentRefPosition.regCount > 0) { if (assignedRegister != REG_NA) { diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 2041ea1336f755..b8f69c57c975d3 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -43,19 +43,41 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo return result; } +//------------------------------------------------------------------------ +// setNextConsecutiveRegisterAssignment: Set the consecutive register mask to the +// subsequent refpositions +// +// Arguments: +// tree - The GT_HWINTRINSIC node of interest +// pDstCount - OUT parameter - the number of registers defined for the given node +// +// Return Value: +// The number of sources consumed by this node. +// void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regMaskTP firstRegAssigned) { assert(isSingleRegister(firstRegAssigned)); - assert(firstRefPosition->needsConsecutive && firstRefPosition->multiRegIdx == 0); + assert(firstRefPosition->needsConsecutive && firstRefPosition->regCount > 0); RefPosition* consecutiveRefPosition = firstRefPosition->nextConsecutiveRefPosition; + + // should have at least one consecutive register requirement + assert(consecutiveRefPosition != nullptr); + regMaskTP registerToAssign = firstRegAssigned; + int refPosCount = 1; while (consecutiveRefPosition != nullptr) { registerToAssign <<= 1; consecutiveRefPosition->registerAssignment = registerToAssign; consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; + +#ifdef DEBUG + refPosCount++; +#endif } + + assert(refPosCount == firstRefPosition->regCount); } //------------------------------------------------------------------------ @@ -1125,14 +1147,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { case NI_AdvSimd_VectorTableLookup_2: case NI_AdvSimd_Arm64_VectorTableLookup_2: + assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 2); regCount = 2; break; case NI_AdvSimd_VectorTableLookup_3: case NI_AdvSimd_Arm64_VectorTableLookup_3: + assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 3); regCount = 3; break; case NI_AdvSimd_VectorTableLookup_4: case NI_AdvSimd_Arm64_VectorTableLookup_4: + assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 4); regCount = 4; break; default: From 2f4a4e3be64bf49657c2c9a696c90ef3b1cf6957 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 6 Jan 2023 06:29:05 -0800 Subject: [PATCH 016/125] TARGET_ARM64 --- src/coreclr/jit/gentree.cpp | 4 ++-- src/coreclr/jit/gentree.h | 17 +++++++++++------ src/coreclr/jit/lower.cpp | 4 ++++ src/coreclr/jit/lsra.cpp | 19 +++++++++++-------- src/coreclr/jit/lsra.h | 27 ++++++++++++++++++--------- src/coreclr/jit/lsrabuild.cpp | 2 ++ src/coreclr/jit/morph.cpp | 8 ++++++-- 7 files changed, 54 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 914a9d772fb851..87f9099af4c3ee 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -805,12 +805,12 @@ int GenTree::GetRegisterDstCount(Compiler* compiler) const if (OperIsScalarLocal()) { -#ifdef FEATURE_HW_INTRINSICS +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_ARM64) if (AsLclVar()->IsMultiRegUse()) { return compiler->lvaGetDesc(AsLclVar())->lvFieldCnt; } -#endif // FEATURE_HW_INTRINSICS +#endif // FEATURE_HW_INTRINSICS && TARGET_ARM64 return AsLclVar()->GetFieldCount(compiler); } assert(!"Unexpected multi-reg node"); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 5f253106101b91..48b932753d8019 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -3771,7 +3771,9 @@ struct GenTreeLclVar : public GenTreeLclVarCommon private: regNumberSmall gtOtherReg[MAX_MULTIREG_COUNT - 1]; MultiRegSpillFlags gtSpillFlags; +#ifdef TARGET_ARM64 bool isMultiRegUse; +#endif public: INDEBUG(IL_OFFSET gtLclILoffs;) // instr offset of ref (only for JIT dumps) @@ -3782,27 +3784,30 @@ struct GenTreeLclVar : public GenTreeLclVarCommon return ((gtFlags & GTF_VAR_MULTIREG) != 0); } - bool IsMultiRegUse() const - { - assert(!isMultiRegUse || ((gtFlags & GTF_VAR_MULTIREG) != 0)); - return isMultiRegUse; - } - void ClearMultiReg() { gtFlags &= ~GTF_VAR_MULTIREG; } + void SetMultiReg() { gtFlags |= GTF_VAR_MULTIREG; ClearOtherRegFlags(); } +#ifdef TARGET_ARM64 + bool IsMultiRegUse() const + { + assert(!isMultiRegUse || ((gtFlags & GTF_VAR_MULTIREG) != 0)); + return isMultiRegUse; + } + void SetMultiRegUse() { isMultiRegUse = true; SetMultiReg(); } +#endif regNumber GetRegNumByIdx(int regIndex) const { diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 24dfa12d4fade1..5ee623d738a1d6 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6668,7 +6668,11 @@ void Lowering::CheckNode(Compiler* compiler, GenTree* node) #endif // FEATURE_SIMD && TARGET_64BIT if (varDsc->lvPromoted) { +#ifdef TARGET_ARM64 assert(varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegRet || node->AsLclVar()->IsMultiRegUse()); +#else + assert(varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegRet); +#endif // TARGET_ARM64 } } break; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 23075287c2716e..d7e2fd9c199204 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2837,8 +2837,6 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, } } - // At this point, we need to make sure that other `regCount` registers are available and then just allocate them - // to subsequent refpositions. assignPhysReg(availablePhysRegRecord, currentInterval); refPosition->registerAssignment = foundRegBit; @@ -5307,7 +5305,8 @@ void LinearScan::allocateRegisters() lastAllocatedRefPosition = ¤tRefPosition; regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); - + +#ifdef TARGET_ARM64 if (currentRefPosition.needsConsecutive) { // For consecutive register, it doesn't matter what the assigned register was. @@ -5318,6 +5317,7 @@ void LinearScan::allocateRegisters() // This should never be the first refposition of the series. assert(currentRefPosition.multiRegIdx != 0); } +#endif regsInUseThisLocation |= copyRegMask | assignedRegMask; if (currentRefPosition.lastUse) { @@ -5376,20 +5376,22 @@ void LinearScan::allocateRegisters() if (currentRefPosition.needsConsecutive) { // For consecutive register, we would like to assign a register (if not already assigned) - // to the 1st position and the subsequent positions will just get the consecutive register. + // to the 1st refPosition and the subsequent refPositions will just get the consecutive register. if (currentRefPosition.regCount > 0) { + // 1st refPosition of the series... if (assignedRegister != REG_NA) { - // For 1st position, if it already has a register assigned, then just assign - // subsequent registers to remaining position and skip the allocation for the - // 1st position altogether. + // For the 1st refPosition, if it already has a register assigned, then just assign + // subsequent registers to the remaining position and skip the allocation for the + // 1st refPosition altogether. setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegBit); } } else { + // remaining refPosition of the series... if (assignedRegBit == currentRefPosition.registerAssignment) { // For the subsequent position, if they already have the subsequent register assigned, then @@ -5398,13 +5400,14 @@ void LinearScan::allocateRegisters() } else { - // If subsequent position is not assigned to the subsequent register, then reassign the right + // If the subsequent refPosition is not assigned to the consecutive register, then reassign the right // consecutive register. assignedRegister = REG_NA; } } } #endif // TARGET_ARM64 + if (assignedRegister == REG_NA) { if (currentRefPosition.RegOptional()) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 3a659c4f03f112..d35511d5f807e4 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2240,13 +2240,6 @@ class RefPosition // are only traversed in the forward direction, and are not moved. RefPosition* nextRefPosition; - // This is temporary. It will be moved to LinearScan level in a map that will store - // the next refposition. Below table, we are storing 2 situation of consecutive registers - // First being 3 consecutive registers (21, 22, 23) and (41, 42). - // 21 -> 22 - // 22 -> 23 - // 41 -> 42 - RefPosition* nextConsecutiveRefPosition; // The remaining fields are common to both options GenTree* treeNode; unsigned int bbNum; @@ -2275,8 +2268,22 @@ class RefPosition // across all targets and that happened to be 4 on Arm. Hence index value // would be 0..MAX_RET_REG_COUNT-1. unsigned char multiRegIdx : 2; + +#ifdef TARGET_ARM64 + // This is temporary. It will be moved to LinearScan level in a map that will store + // the next refposition. Below table, we are storing 2 situation of consecutive registers + // First being 3 consecutive registers (21, 22, 23) and (41, 42). + // 21 -> 22 + // 22 -> 23 + // 41 -> 42 + RefPosition* nextConsecutiveRefPosition; + + // If this refposition needs consecutive register assignment bool needsConsecutive; + + // How many consecutive registers does this and subsequent refPositions need unsigned char regCount : 2; +#endif // TARGET_ARM64 // Last Use - this may be true for multiple RefPositions in the same Interval unsigned char lastUse : 1; @@ -2357,15 +2364,17 @@ class RefPosition RefType refType DEBUG_ARG(GenTree* buildNode)) : referent(nullptr) , nextRefPosition(nullptr) - , nextConsecutiveRefPosition(nullptr) , treeNode(treeNode) , bbNum(bbNum) , nodeLocation(nodeLocation) , registerAssignment(RBM_NONE) , refType(refType) , multiRegIdx(0) +#ifdef TARGET_ARM64 + , nextConsecutiveRefPosition(nullptr) , needsConsecutive(false) - , regCount(1) + , regCount(0) +#endif , lastUse(false) , reload(false) , spillAfter(false) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 6fbcff364f9d2e..17896a40275a74 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3089,7 +3089,9 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu operand = nullptr; } RefPosition* useRefPos = newRefPosition(interval, currentLoc, RefTypeUse, operand, candidates, multiRegIdx); +#ifdef TARGET_ARM64 useRefPos->needsConsecutive = needsConsecutive; +#endif useRefPos->setRegOptional(regOptional); return useRefPos; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 58a7399007881c..6c2c25a9758098 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12144,12 +12144,16 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) // Promoted structs after morph must be in one of two states: // a) Fully eliminated from the IR (independent promotion) OR only be - // used by "special" nodes (e. g. LHS of ASGs for multi-reg structs). + // used by "special" nodes (e. g. LHS of ASGs for multi-reg structs or RHS of ASGs for multi-use). // b) Marked as do-not-enregister (dependent promotion). // // So here we preserve this invariant and mark any promoted structs as do-not-enreg. // - if (operand->OperIs(GT_LCL_VAR) && lvaGetDesc(operand->AsLclVar())->lvPromoted && !operand->AsLclVar()->IsMultiRegUse()) + if (operand->OperIs(GT_LCL_VAR) && lvaGetDesc(operand->AsLclVar())->lvPromoted +#ifdef TARGET_ARM64 + && !operand->AsLclVar()->IsMultiRegUse() +#endif + ) { lvaSetVarDoNotEnregister(operand->AsLclVar()->GetLclNum() DEBUGARG(DoNotEnregisterReason::SimdUserForcesDep)); From 8d3744ba593524e30d0c86186f759276d3dc53ca Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 6 Jan 2023 07:13:08 -0800 Subject: [PATCH 017/125] Use getNextConsecutiveRefPositions instead of nextConsecutiveRefPosition field --- src/coreclr/jit/lsra.cpp | 24 ++++++++++++++++++++++++ src/coreclr/jit/lsra.h | 24 +++++++++++++++--------- src/coreclr/jit/lsraarm64.cpp | 11 +++++++---- 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d7e2fd9c199204..71ecf7c4b4e366 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1237,6 +1237,10 @@ PhaseStatus LinearScan::doLinearScan() splitBBNumToTargetBBNumMap = nullptr; +#ifdef TARGET_ARM64 + nextConsecutiveRefPositionMap = nullptr; +#endif + // This is complicated by the fact that physical registers have refs associated // with locations where they are killed (e.g. calls), but we don't want to // count these as being touched. @@ -2129,6 +2133,26 @@ VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarT return inVarToRegMap; } +#ifdef TARGET_ARM64 +//------------------------------------------------------------------------ +// getNextConsecutiveRefPosition: Get the next subsequent refPosition. +// +// Arguments: +// refPosition - The refposition for which we need to find next refposition +// +// Return Value: +// The next refPosition or nullptr if there is not one. +// +RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) +{ + RefPosition* nextRefPosition; + assert(refPosition->needsConsecutive); + nextConsecutiveRefPositionMap->Lookup(refPosition, &nextRefPosition); + assert((nextRefPosition == nullptr) || nextRefPosition->needsConsecutive); + return nextRefPosition; +} +#endif + //------------------------------------------------------------------------ // checkLastUses: Check correctness of last use flags // diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index d35511d5f807e4..dc2c62f0bf0981 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1361,6 +1361,21 @@ class LinearScan : public LinearScanInterface regNumber getTempRegForResolution(BasicBlock* fromBlock, BasicBlock* toBlock, var_types type); +#ifdef TARGET_ARM64 + typedef JitHashTable, RefPosition*> NextConsecutiveRefPositionsMap; + NextConsecutiveRefPositionsMap* nextConsecutiveRefPositionMap; + NextConsecutiveRefPositionsMap* getNextConsecutiveRefPositionsMap() + { + if (nextConsecutiveRefPositionMap == nullptr) + { + nextConsecutiveRefPositionMap = + new (getAllocator(compiler)) NextConsecutiveRefPositionsMap(getAllocator(compiler)); + } + return nextConsecutiveRefPositionMap; + } + RefPosition* getNextConsecutiveRefPosition(RefPosition* refPosition); +#endif + #ifdef DEBUG void dumpVarToRegMap(VarToRegMap map); void dumpInVarToRegMap(BasicBlock* block); @@ -2270,14 +2285,6 @@ class RefPosition unsigned char multiRegIdx : 2; #ifdef TARGET_ARM64 - // This is temporary. It will be moved to LinearScan level in a map that will store - // the next refposition. Below table, we are storing 2 situation of consecutive registers - // First being 3 consecutive registers (21, 22, 23) and (41, 42). - // 21 -> 22 - // 22 -> 23 - // 41 -> 42 - RefPosition* nextConsecutiveRefPosition; - // If this refposition needs consecutive register assignment bool needsConsecutive; @@ -2371,7 +2378,6 @@ class RefPosition , refType(refType) , multiRegIdx(0) #ifdef TARGET_ARM64 - , nextConsecutiveRefPosition(nullptr) , needsConsecutive(false) , regCount(0) #endif diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index b8f69c57c975d3..54f6ee847ebd7e 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -59,7 +59,7 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit assert(isSingleRegister(firstRegAssigned)); assert(firstRefPosition->needsConsecutive && firstRefPosition->regCount > 0); - RefPosition* consecutiveRefPosition = firstRefPosition->nextConsecutiveRefPosition; + RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); // should have at least one consecutive register requirement assert(consecutiveRefPosition != nullptr); @@ -70,7 +70,7 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit { registerToAssign <<= 1; consecutiveRefPosition->registerAssignment = registerToAssign; - consecutiveRefPosition = consecutiveRefPosition->nextConsecutiveRefPosition; + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); #ifdef DEBUG refPosCount++; @@ -1184,8 +1184,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - currRefPos->regCount = 0; // Explicitely set it so we can identify that this is non-first refposition. - lastRefPos->nextConsecutiveRefPosition = currRefPos; + // Explicitely set regCount=0 so we can identify that this is non-first refposition. + currRefPos->regCount = 0; + + getNextConsecutiveRefPositionsMap()->Set(lastRefPos, currRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); + getNextConsecutiveRefPositionsMap()->Set(currRefPos, nullptr); } lastRefPos = currRefPos; } From 8d66d45d7eb2dec0d91732eb2044ee7c9eda7002 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 6 Jan 2023 07:14:17 -0800 Subject: [PATCH 018/125] jit format --- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/hwintrinsic.cpp | 18 +++++++++--------- src/coreclr/jit/hwintrinsic.h | 18 +++++++++--------- src/coreclr/jit/lsra.cpp | 9 +++++---- src/coreclr/jit/lsra.h | 16 +++++++++------- src/coreclr/jit/lsraarm64.cpp | 17 +++++++++-------- src/coreclr/jit/morph.cpp | 2 +- 7 files changed, 43 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 48b932753d8019..4f2544a3086918 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -3772,7 +3772,7 @@ struct GenTreeLclVar : public GenTreeLclVarCommon regNumberSmall gtOtherReg[MAX_MULTIREG_COUNT - 1]; MultiRegSpillFlags gtSpillFlags; #ifdef TARGET_ARM64 - bool isMultiRegUse; + bool isMultiRegUse; #endif public: diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 93435395bb200d..4d1a4b08a0755a 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1141,11 +1141,11 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, #ifdef TARGET_ARM64 if ((intrinsic == NI_AdvSimd_VectorTableLookup) || (intrinsic == NI_AdvSimd_Arm64_VectorTableLookup)) { - op1 = impPopStack().val; - ClassLayout* layout = op1->GetLayout(this); - unsigned structSize = layout->GetSize(); - unsigned slotCount = layout->GetSlotCount(); - var_types typeOfLayout = layout->GetType(); + op1 = impPopStack().val; + ClassLayout* layout = op1->GetLayout(this); + unsigned structSize = layout->GetSize(); + unsigned slotCount = layout->GetSlotCount(); + var_types typeOfLayout = layout->GetType(); if (typeOfLayout == TYP_STRUCT) { unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); @@ -1168,7 +1168,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, offset += 16; } - + switch (fieldCount) { case 1: @@ -1176,8 +1176,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // NI_AdvSimd_Arm64_VectorTableLookup break; case 2: - //NI_AdvSimd_VectorTableLookup_2 - //NI_AdvSimd_Arm64_VectorTableLookup_2 + // NI_AdvSimd_VectorTableLookup_2 + // NI_AdvSimd_Arm64_VectorTableLookup_2 intrinsic = (NamedIntrinsic)(intrinsic + 1); break; case 3: @@ -1205,7 +1205,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, else #endif { - op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); + op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic) : gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 5b60517d161afd..78e24bb0b05b8f 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -778,15 +778,15 @@ struct HWIntrinsicInfo switch (id) { #ifdef TARGET_ARM64 - case NI_AdvSimd_VectorTableLookup_2: - case NI_AdvSimd_Arm64_VectorTableLookup_2: - return 2; - case NI_AdvSimd_VectorTableLookup_3: - case NI_AdvSimd_Arm64_VectorTableLookup_3: - return 3; - case NI_AdvSimd_VectorTableLookup_4: - case NI_AdvSimd_Arm64_VectorTableLookup_4: - return 4; + case NI_AdvSimd_VectorTableLookup_2: + case NI_AdvSimd_Arm64_VectorTableLookup_2: + return 2; + case NI_AdvSimd_VectorTableLookup_3: + case NI_AdvSimd_Arm64_VectorTableLookup_3: + return 3; + case NI_AdvSimd_VectorTableLookup_4: + case NI_AdvSimd_Arm64_VectorTableLookup_4: + return 4; #endif default: unreached(); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 71ecf7c4b4e366..4e516798988e77 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2796,7 +2796,7 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // no such ref position, no register will be allocated. // -regNumber LinearScan::allocateReg(Interval* currentInterval, +regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { regMaskTP foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); @@ -2870,7 +2870,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, setNextConsecutiveRegisterAssignment(refPosition, foundRegBit); } #endif // TARGET_ARM64 - + return foundReg; } @@ -5410,7 +5410,7 @@ void LinearScan::allocateRegisters() // subsequent registers to the remaining position and skip the allocation for the // 1st refPosition altogether. - setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegBit); + setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegBit); } } else @@ -5424,7 +5424,8 @@ void LinearScan::allocateRegisters() } else { - // If the subsequent refPosition is not assigned to the consecutive register, then reassign the right + // If the subsequent refPosition is not assigned to the consecutive register, then reassign the + // right // consecutive register. assignedRegister = REG_NA; } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index dc2c62f0bf0981..74dbbe6c64137f 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1179,16 +1179,15 @@ class LinearScan : public LinearScanInterface void spillGCRefs(RefPosition* killRefPosition); - /***************************************************************************** - * Register selection - ****************************************************************************/ - +/***************************************************************************** +* Register selection +****************************************************************************/ #if defined(TARGET_ARM64) regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); - void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regMaskTP firstRegAssigned); + void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regMaskTP firstRegAssigned); #else - regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) + regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) { regMaskTP result = candidates & m_AvailableRegs; #ifdef TARGET_ARM @@ -1860,7 +1859,10 @@ class LinearScan : public LinearScanInterface bool isCandidateMultiRegLclVar(GenTreeLclVar* lclNode); bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); - RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0, bool needsConsecutive = false); + RefPosition* BuildUse(GenTree* operand, + regMaskTP candidates = RBM_NONE, + int multiRegIdx = 0, + bool needsConsecutive = false); void setDelayFree(RefPosition* use); int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 54f6ee847ebd7e..18b7bcfbd73b82 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -61,16 +61,16 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); - // should have at least one consecutive register requirement + // should have at least one consecutive register requirement assert(consecutiveRefPosition != nullptr); - regMaskTP registerToAssign = firstRegAssigned; - int refPosCount = 1; + regMaskTP registerToAssign = firstRegAssigned; + int refPosCount = 1; while (consecutiveRefPosition != nullptr) { registerToAssign <<= 1; consecutiveRefPosition->registerAssignment = registerToAssign; - consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); #ifdef DEBUG refPosCount++; @@ -1140,8 +1140,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - int regCount; - RefPosition* useRefPos1 = nullptr; + int regCount; + RefPosition* useRefPos1 = nullptr; RefPosition* nextUseRefPos = nullptr; switch (intrin.id) { @@ -1173,7 +1173,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { assert(intrin.op1->OperIs(GT_LCL_VAR)); - RefPosition* lastRefPos = nullptr; + RefPosition* lastRefPos = nullptr; // consecutive registers for (int regIdx = 0; regIdx < regCount; regIdx++) { @@ -1187,7 +1187,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // Explicitely set regCount=0 so we can identify that this is non-first refposition. currRefPos->regCount = 0; - getNextConsecutiveRefPositionsMap()->Set(lastRefPos, currRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); + getNextConsecutiveRefPositionsMap()->Set(lastRefPos, currRefPos, + LinearScan::NextConsecutiveRefPositionsMap::Overwrite); getNextConsecutiveRefPositionsMap()->Set(currRefPos, nullptr); } lastRefPos = currRefPos; diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 6c2c25a9758098..ec6e357a40a2ec 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12153,7 +12153,7 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) #ifdef TARGET_ARM64 && !operand->AsLclVar()->IsMultiRegUse() #endif - ) + ) { lvaSetVarDoNotEnregister(operand->AsLclVar()->GetLclNum() DEBUGARG(DoNotEnregisterReason::SimdUserForcesDep)); From 85d90f548a66002cbb02c9f3a539178a2bab1c62 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 6 Jan 2023 07:17:52 -0800 Subject: [PATCH 019/125] Move getNextConsecutiveRefPosition --- src/coreclr/jit/lsra.cpp | 20 -------------------- src/coreclr/jit/lsraarm64.cpp | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 4e516798988e77..4f4dfef3516682 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2133,26 +2133,6 @@ VarToRegMap LinearScan::setInVarToRegMap(unsigned int bbNum, VarToRegMap srcVarT return inVarToRegMap; } -#ifdef TARGET_ARM64 -//------------------------------------------------------------------------ -// getNextConsecutiveRefPosition: Get the next subsequent refPosition. -// -// Arguments: -// refPosition - The refposition for which we need to find next refposition -// -// Return Value: -// The next refPosition or nullptr if there is not one. -// -RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) -{ - RefPosition* nextRefPosition; - assert(refPosition->needsConsecutive); - nextConsecutiveRefPositionMap->Lookup(refPosition, &nextRefPosition); - assert((nextRefPosition == nullptr) || nextRefPosition->needsConsecutive); - return nextRefPosition; -} -#endif - //------------------------------------------------------------------------ // checkLastUses: Check correctness of last use flags // diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 18b7bcfbd73b82..ffec0cf6cc0ec6 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -43,6 +43,24 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo return result; } +//------------------------------------------------------------------------ +// getNextConsecutiveRefPosition: Get the next subsequent refPosition. +// +// Arguments: +// refPosition - The refposition for which we need to find next refposition +// +// Return Value: +// The next refPosition or nullptr if there is not one. +// +RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) +{ + RefPosition* nextRefPosition; + assert(refPosition->needsConsecutive); + nextConsecutiveRefPositionMap->Lookup(refPosition, &nextRefPosition); + assert((nextRefPosition == nullptr) || nextRefPosition->needsConsecutive); + return nextRefPosition; +} + //------------------------------------------------------------------------ // setNextConsecutiveRegisterAssignment: Set the consecutive register mask to the // subsequent refpositions From 0a0faed07ea0ed8affd9b96abe0fcda27154cb76 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 6 Jan 2023 14:10:29 -0800 Subject: [PATCH 020/125] SA1141: Use tuple syntax --- .../System/Runtime/Intrinsics/Arm/AdvSimd.cs | 24 +++++++++---------- .../ref/System.Runtime.Intrinsics.cs | 2 -- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index fa9e8d9fb89fe7..e25413444014ee 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -3663,37 +3663,37 @@ internal Arm64() { } /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector128 VectorTableLookup(ValueTuple, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B /// - public static Vector128 VectorTableLookup(ValueTuple, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B /// - public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B /// - public static Vector128 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) @@ -15004,37 +15004,37 @@ internal Arm64() { } /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector64 VectorTableLookup(ValueTuple, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B /// - public static Vector64 VectorTableLookup(ValueTuple, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B /// - public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B /// - public static Vector64 VectorTableLookup(ValueTuple, Vector128, Vector128, Vector128> table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 0a9fbf7b4afabf..1a537387e75acc 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -3176,14 +3176,12 @@ public unsafe static void StorePairScalarNonTemporal(uint* address, System.Runti public static System.Runtime.Intrinsics.Vector64 UnzipOdd(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } From 036a273f3493705d974ada822a2d4a3ebd3f7535 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 8 Jan 2023 21:45:38 -0800 Subject: [PATCH 021/125] Remove the unwanted field list code --- src/coreclr/jit/hwintrinsic.cpp | 41 +++++++++------------------------ 1 file changed, 11 insertions(+), 30 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 4d1a4b08a0755a..e65eb50edddf9f 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1141,34 +1141,10 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, #ifdef TARGET_ARM64 if ((intrinsic == NI_AdvSimd_VectorTableLookup) || (intrinsic == NI_AdvSimd_Arm64_VectorTableLookup)) { - op1 = impPopStack().val; - ClassLayout* layout = op1->GetLayout(this); - unsigned structSize = layout->GetSize(); - unsigned slotCount = layout->GetSlotCount(); - var_types typeOfLayout = layout->GetType(); - if (typeOfLayout == TYP_STRUCT) + op1 = impPopStack().val; + if (op1->TypeGet() == TYP_STRUCT) { unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); - op1->AsLclVar()->SetMultiRegUse(); - GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - int offset = 0; - for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) - { - unsigned lclNum = lvaGrabTemp(true DEBUGARG("VectorTableLookup")); - LclVarDsc* fldVarDsc = lvaGetDesc(lclNum); - fldVarDsc->lvType = TYP_SIMD16; - - CORINFO_FIELD_HANDLE fieldHandle = - info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, fieldId); - CORINFO_CLASS_HANDLE classHandle = info.compCompHnd->getFieldClass(fieldHandle); - lvaSetStruct(lclNum, classHandle, true); - - GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, TYP_SIMD16, offset); - fieldList->AddField(this, fldNode, 0, TYP_SIMD16); - - offset += 16; - } - switch (fieldCount) { case 1: @@ -1193,14 +1169,19 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, default: noway_assert("Unknown field count"); } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + + // The `op1` although is used as parameter in SIMD intrinsic, we like to independently promotion all + // the fields of it so they can enregistered and get consecutive registers. + op1->AsLclVar()->SetMultiRegUse(); + lvaGetDesc(op1->AsLclVar())->lvUsedInSIMDIntrinsic = false; } else { - assert(typeOfLayout == TYP_SIMD16); + assert(op1->TypeGet() == TYP_SIMD16); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); } - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - lvaGetDesc(op1->AsLclVar())->lvUsedInSIMDIntrinsic = false; } else #endif From 791563a65ae97f39a3786eb781a27aeb3f642112 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 8 Jan 2023 21:46:27 -0800 Subject: [PATCH 022/125] revert the flag that was mistakenly changed --- src/coreclr/jit/hwintrinsiclistarm64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index d8a0add3c518f4..9bd4e6f6cdf5a7 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -475,7 +475,7 @@ HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_2, 8, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_3, 8, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_4, 8, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, {INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor}, HW_Category_SIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, {INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningUpper, 16, 1, {INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) From fdc94ede65a512dd06b9e6c137000545c4fc4aef Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 11 Jan 2023 22:41:16 -0800 Subject: [PATCH 023/125] Add test cases --- .../GenerateHWIntrinsicTests_Arm.cs | 12 + .../Arm/Shared/VectorLookup_2Test.template | 377 +++++++++++++++ .../Arm/Shared/VectorLookup_3Test.template | 408 ++++++++++++++++ .../Arm/Shared/VectorLookup_4Test.template | 438 ++++++++++++++++++ 4 files changed, 1235 insertions(+) create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index e7a1b698a4d4db..1bec97194db3ba 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -1704,6 +1704,12 @@ ("VecBinOpTest.template", new Dictionary { ["TestName"] = "SubtractWideningUpper_Vector128_UInt64_Vector128_UInt32", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "SubtractWideningUpper", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.SubtractWideningUpper(left, right, i) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookup_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, right, left) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, right, left) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookup_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, right, left) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, right, left) != result[i]"}), + ("VectorLookup_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookup2_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 40)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookup2_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 40)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookup3_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 60)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookup3_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 60)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector64", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector64", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), ("VecBinOpTest.template", new Dictionary { ["TestName"] = "Xor_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "Xor", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.Xor(left[i], right[i]) != result[i]"}), @@ -2306,6 +2312,12 @@ ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "TransposeOdd_Vector128_UInt64", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "TransposeOdd", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateEntry"] = "result[index] != left[i+1] || result[++index] != right[i+1]"}), ("SimpleBinOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookup_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, right, left) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, right, left) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookup_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, right, left) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, right, left) != result[i]"}), + ("VectorLookup_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookup2_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 40)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookup2_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 40)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookup3_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 60)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookup3_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 60)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), + ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "UnzipEven_Vector64_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "UnzipEven", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateEntry"] = "result[index] != left[i] || result[index + half] != right[i]"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template new file mode 100644 index 00000000000000..4b28a7c1e5f5cc --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template @@ -0,0 +1,377 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void VectorLookup_2_{RetBaseType}() + { + var test = new VectorLookup_2Test__{Method}{RetBaseType}(); + + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookup_2Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op2BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op2VectorType}<{Op2BaseType}> _fld3; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookup_2Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}((_fld1, _fld2), _fld3); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data3 = new {Op2BaseType}[Op1ElementCount]; + + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op2VectorType}<{Op2BaseType}> _clsVar3; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op2VectorType}<{Op2BaseType}> _fld3; + + private DataTable _dataTable; + + static VectorLookup_2Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookup_2Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method is null) + { + method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) + }); + } + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + (_clsVar1, + _clsVar2), + _clsVar3 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr); + var result = {Isa}.{Method}((op1, op2), op3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookup_2Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}((test._fld1, test._fld2), test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}((_fld1, _fld2), _fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}((test._fld1, test._fld2), test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op2VectorType}<{Op2BaseType}> op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray3 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray3 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp}; + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template new file mode 100644 index 00000000000000..c2472b39191bbf --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template @@ -0,0 +1,408 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void VectorLookup_3_{RetBaseType}() + { + var test = new VectorLookup_3Test__{Method}{RetBaseType}(); + + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookup_3Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] inArray4; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle inHandle4; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {Op2BaseType}[] inArray4, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray4 = inArray4.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfinArray4 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.inArray4 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.inHandle4 = GCHandle.Alloc(this.inArray4, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray4Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), (uint)sizeOfinArray4); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray4Ptr => Align((byte*)(inHandle4.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + inHandle4.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op1VectorType}<{Op1BaseType}> _fld3; + public {Op2VectorType}<{Op2BaseType}> _fld4; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookup_3Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}((_fld1, _fld2, _fld3), _fld4); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, _fld4, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data4 = new {Op2BaseType}[Op2ElementCount]; + + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op1VectorType}<{Op1BaseType}> _clsVar3; + private static {Op2VectorType}<{Op2BaseType}> _clsVar4; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op2VectorType}<{Op2BaseType}> _fld4; + + private DataTable _dataTable; + + static VectorLookup_3Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookup_3Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data1, _data2, _data3, _data4, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method is null) + { + method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) + }); + } + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + (_clsVar1, + _clsVar2, + _clsVar3), + _clsVar4 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar1, _clsVar2, _clsVar3, _clsVar4, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); + var op4 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr); + var result = {Isa}.{Method}((op1, op2, op3), op4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, op4, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookup_3Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}((test._fld1, test._fld2, test._fld3), test._fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, test._fld4, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}((_fld1, _fld2, _fld3), _fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _fld4, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}((test._fld1, test._fld2, test._fld3), test._fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, test._fld4, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op2VectorType}<{Op2BaseType}> op4, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray4 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), op4); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, inArray4, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* op4, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray4 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), ref Unsafe.AsRef(op4), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, inArray4, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp, thirdOp}; + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template new file mode 100644 index 00000000000000..c6f1f941764a5d --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template @@ -0,0 +1,438 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void VectorLookup_4_{RetBaseType}() + { + var test = new VectorLookup_4Test__{Method}{RetBaseType}(); + + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookup_4Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] inArray4; + private byte[] inArray5; + private byte[] outArray; + + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle inHandle4; + private GCHandle inHandle5; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {Op1BaseType}[] inArray4, {Op2BaseType}[] inArray5, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray4 = inArray4.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray5 = inArray5.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfinArray4 || (alignment * 2) < sizeOfinArray5 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.inArray4 = new byte[alignment * 2]; + this.inArray5 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.inHandle4 = GCHandle.Alloc(this.inArray4, GCHandleType.Pinned); + this.inHandle5 = GCHandle.Alloc(this.inArray5, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray4Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), (uint)sizeOfinArray4); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray5Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), (uint)sizeOfinArray5); + } + + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray4Ptr => Align((byte*)(inHandle4.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray5Ptr => Align((byte*)(inHandle5.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + inHandle4.Free(); + inHandle5.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op1VectorType}<{Op1BaseType}> _fld3; + public {Op1VectorType}<{Op1BaseType}> _fld4; + public {Op2VectorType}<{Op2BaseType}> _fld5; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookup_4Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}((_fld1, _fld2, _fld3, _fld4), _fld5); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, _fld4, _fld5, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data4 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data5 = new {Op2BaseType}[Op2ElementCount]; + + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op1VectorType}<{Op1BaseType}> _clsVar3; + private static {Op1VectorType}<{Op1BaseType}> _clsVar4; + private static {Op2VectorType}<{Op2BaseType}> _clsVar5; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op1VectorType}<{Op1BaseType}> _fld4; + private {Op2VectorType}<{Op2BaseType}> _fld5; + + private DataTable _dataTable; + + static VectorLookup_4Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookup_4Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data1, _data2, _data3, _data4, _data5, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.inArray5Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method is null) + { + method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) + }); + } + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.inArray5Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + (_clsVar1, + _clsVar2, + _clsVar3, + _clsVar4), + _clsVar5 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar1, _clsVar2, _clsVar3, _clsVar4, _clsVar5, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); + var op4 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr); + var op5 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr); + var result = {Isa}.{Method}((op1, op2, op3, op4), op5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, op4, op5, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookup_4Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}((test._fld1, test._fld2, test._fld3, test._fld4), test._fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, test._fld4, test._fld5, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}((_fld1, _fld2, _fld3, _fld4), _fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _fld4, _fld5, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}((test._fld1, test._fld2, test._fld3, test._fld4), test._fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, test._fld4, test._fld5, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> op4, {Op2VectorType}<{Op2BaseType}> op5, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray4 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray5 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), op4); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), op5); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, inArray4, inArray5, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* op4, void* op5, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray4 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray5 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), ref Unsafe.AsRef(op4), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), ref Unsafe.AsRef(op5), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, inArray4, inArray5, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {Op1BaseType}[] fourthOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp, thirdOp, fourthOp}; + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); + TestLibrary.TestFramework.LogInformation($" fourthOp: ({string.Join(", ", fourthOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} From cf84fda228ec2f7e947b48fc119b00fcd2a2b878 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 20 Jan 2023 23:19:36 -0800 Subject: [PATCH 024/125] FIELD_LIST --- src/coreclr/jit/codegenlinear.cpp | 8 +++++ src/coreclr/jit/gentree.cpp | 7 +++- src/coreclr/jit/hwintrinsic.cpp | 29 ++++++++++++++--- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 20 ++++++++++-- src/coreclr/jit/lsraarm64.cpp | 36 +++++++++++++++++---- 5 files changed, 85 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 1cb20dde70a800..d8e5f42f2c1fee 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1641,6 +1641,14 @@ void CodeGen::genConsumeRegs(GenTree* tree) genConsumeRegs(tree->gtGetOp1()); genConsumeRegs(tree->gtGetOp2()); } + else if (tree->OperIsFieldList()) + { + for (GenTreeFieldList::Use& use : tree->AsFieldList()->Uses()) + { + GenTree* fieldNode = use.GetNode(); + genConsumeReg(fieldNode); + } + } #endif else if (tree->OperIsLocalRead()) { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 620efaabcec0da..d7baeb6d211d99 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -11957,7 +11957,12 @@ void Compiler::gtDispTree(GenTree* tree, case NI_System_Runtime_CompilerServices_RuntimeHelpers_IsKnownConstant: printf(" isKnownConst"); break; - + case NI_SIMD_UpperRestore: + printf(" simd_upperRestr"); + break; + case NI_SIMD_UpperSave: + printf(" simd_upperSave"); + break; default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index f2332419bced39..a773a3acd70971 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1146,6 +1146,30 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, if (op1->TypeGet() == TYP_STRUCT) { unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); + + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + int offset = 0; + for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) + { + unsigned lclNum = lvaGrabTemp(true DEBUGARG("VectorTableLookup")); + LclVarDsc* fldVarDsc = lvaGetDesc(lclNum); + + CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, fieldId); + CORINFO_CLASS_HANDLE innerFieldClsHnd; + JITtype2varType( + info.compCompHnd->getFieldType(fieldHandle, &innerFieldClsHnd, + info.compCompHnd->getFieldClass(fieldHandle))); + + lvaSetStruct(lclNum, innerFieldClsHnd, true); + + GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fldVarDsc->TypeGet(), offset); + //fldNode->forceEnregister = true; + fieldList->AddField(this, fldNode, offset, fldVarDsc->TypeGet()); + + offset += fldVarDsc->lvSize(); + } + op1 = fieldList; + switch (fieldCount) { case 1: @@ -1172,11 +1196,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - - // The `op1` although is used as parameter in SIMD intrinsic, we like to independently promotion all - // the fields of it so they can enregistered and get consecutive registers. - op1->AsLclVar()->SetMultiRegUse(); - lvaGetDesc(op1->AsLclVar())->lvUsedInSIMDIntrinsic = false; } else { diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 8cb34e3b7a3041..d0b80e4a4fe134 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -510,18 +510,34 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_Arm64_VectorTableLookup_3: case NI_AdvSimd_VectorTableLookup_4: case NI_AdvSimd_Arm64_VectorTableLookup_4: - if (intrin.op1->IsCopyOrReload()) + { + + if (!intrin.op1->OperIsFieldList()) + { + assert(!"Expect the first operand of VectorTableLookup to be FIELD_LIST"); + } + + GenTreeFieldList* fieldList = intrin.op1->AsFieldList(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + + if (firstField->IsCopyOrReload()) { // If value is copied in a register to satisfy the consecutive-register // requirement, make sure to get the source's register because these // instruction encoding takes only the 1st register and infer the rest // from that. - GenTree* op1 = intrin.op1->AsCopyOrReload()->gtGetOp1(); + GenTree* op1 = firstField->AsCopyOrReload()->gtGetOp1(); assert(!op1->IsCopyOrReload()); op1Reg = op1->GetRegNum(); } + else + { + assert(firstField->OperIsLocalField()); + op1Reg = firstField->GetRegNum(); + } GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; + } case NI_AdvSimd_BitwiseSelect: // Even though BitwiseSelect is an RMW intrinsic per se, we don't want to mark it as such diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index b08f0830b48fb8..132146f24ac352 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1045,17 +1045,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { case NI_AdvSimd_VectorTableLookup_2: case NI_AdvSimd_Arm64_VectorTableLookup_2: - assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 2); + //assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 2); regCount = 2; break; case NI_AdvSimd_VectorTableLookup_3: case NI_AdvSimd_Arm64_VectorTableLookup_3: - assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 3); + //assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 3); regCount = 3; break; case NI_AdvSimd_VectorTableLookup_4: case NI_AdvSimd_Arm64_VectorTableLookup_4: - assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 4); + //assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 4); regCount = 4; break; default: @@ -1069,13 +1069,14 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - assert(intrin.op1->OperIs(GT_LCL_VAR)); - RefPosition* lastRefPos = nullptr; // consecutive registers - for (int regIdx = 0; regIdx < regCount; regIdx++) + + int regCount = 0; + for (GenTreeFieldList::Use& use : intrin.op1->AsFieldList()->Uses()) { - RefPosition* currRefPos = BuildUse(intrin.op1, RBM_NONE, regIdx, /* needsConsecutive */ true); + RefPosition* currRefPos = + BuildUse(use.GetNode(), RBM_NONE, 0, /* needsConsecutive */ true); if (lastRefPos == nullptr) { currRefPos->regCount = regCount; @@ -1090,8 +1091,29 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou getNextConsecutiveRefPositionsMap()->Set(currRefPos, nullptr); } lastRefPos = currRefPos; + regCount++; } srcCount += regCount; +#ifdef DEBUG + switch (regCount) + { + case 2: + assert((intrin.id == NI_AdvSimd_VectorTableLookup_2) || + (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_2)); + break; + case 3: + assert((intrin.id == NI_AdvSimd_VectorTableLookup_3) || + (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_3)); + break; + case 4: + assert((intrin.id == NI_AdvSimd_VectorTableLookup_4) || + (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_4)); + break; + default: + assert(!"Unexpected register count for VectorTableLookup"); + break; + } +#endif // DEBUG } } } From 06a78d4e44154d573da084b6447c378ff5ff1381 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 23 Jan 2023 23:45:15 -0800 Subject: [PATCH 025/125] Use FIELD_LIST approach --- src/coreclr/jit/hwintrinsic.cpp | 22 ++++-------- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 1 - src/coreclr/jit/lsra.cpp | 4 +-- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 38 ++++----------------- 5 files changed, 16 insertions(+), 51 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index a773a3acd70971..ea76e1d9aea1b8 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1143,6 +1143,10 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, if ((intrinsic == NI_AdvSimd_VectorTableLookup) || (intrinsic == NI_AdvSimd_Arm64_VectorTableLookup)) { op1 = impPopStack().val; + assert(op1->OperIsLocal()); + + LclVarDsc* op1VarDsc = lvaGetDesc(op1->AsLclVar()); + unsigned lclNum = lvaGetLclNum(op1VarDsc); if (op1->TypeGet() == TYP_STRUCT) { unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); @@ -1151,22 +1155,10 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, int offset = 0; for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) { - unsigned lclNum = lvaGrabTemp(true DEBUGARG("VectorTableLookup")); - LclVarDsc* fldVarDsc = lvaGetDesc(lclNum); - - CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(sigReader.op1ClsHnd, fieldId); - CORINFO_CLASS_HANDLE innerFieldClsHnd; - JITtype2varType( - info.compCompHnd->getFieldType(fieldHandle, &innerFieldClsHnd, - info.compCompHnd->getFieldClass(fieldHandle))); - - lvaSetStruct(lclNum, innerFieldClsHnd, true); - - GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fldVarDsc->TypeGet(), offset); - //fldNode->forceEnregister = true; - fieldList->AddField(this, fldNode, offset, fldVarDsc->TypeGet()); + GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, TYP_SIMD16, offset); + fieldList->AddField(this, fldNode, offset, TYP_SIMD16); - offset += fldVarDsc->lvSize(); + offset += op1VarDsc->lvSize() / fieldCount; } op1 = fieldList; diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index d0b80e4a4fe134..98f64fd0a39a09 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -532,7 +532,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { - assert(firstField->OperIsLocalField()); op1Reg = firstField->GetRegNum(); } GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index dc3f17ffb5707d..40fe41ef680f60 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2845,7 +2845,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, refPosition->registerAssignment = foundRegBit; #ifdef TARGET_ARM64 - if (refPosition->needsConsecutive && (refPosition->multiRegIdx == 0)) + if (refPosition->needsConsecutive && (refPosition->regCount != 0)) { setNextConsecutiveRegisterAssignment(refPosition, foundRegBit); } @@ -5319,7 +5319,7 @@ void LinearScan::allocateRegisters() assignedRegMask = REG_NA; // This should never be the first refposition of the series. - assert(currentRefPosition.multiRegIdx != 0); + assert(currentRefPosition.regCount == 0); } #endif regsInUseThisLocation |= copyRegMask | assignedRegMask; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 64a0d5f4a40160..81ffb67ca97c66 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2287,7 +2287,7 @@ class RefPosition bool needsConsecutive; // How many consecutive registers does this and subsequent refPositions need - unsigned char regCount : 2; + unsigned char regCount : 3; #endif // TARGET_ARM64 // Last Use - this may be true for multiple RefPositions in the same Interval diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 132146f24ac352..ee6ea456f2bab9 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -28,14 +28,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) { regMaskTP result = candidates & m_AvailableRegs; - if (!refPosition->needsConsecutive || (refPosition->multiRegIdx != 0)) + + if (!refPosition->needsConsecutive || (refPosition->regCount == 0)) { return result; } - assert(refPosition->regCount != 0); - - // If refPosition->multiRegIdx == 0, we need to make sure we check for all the + // If refPosition->regCount != 0, we need to make sure we check for all the // `regCount` available regs. result &= (m_AvailableRegs >> (refPosition->regCount - 1)); @@ -1038,24 +1037,21 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - int regCount; + unsigned regCount = 0; RefPosition* useRefPos1 = nullptr; RefPosition* nextUseRefPos = nullptr; switch (intrin.id) { case NI_AdvSimd_VectorTableLookup_2: case NI_AdvSimd_Arm64_VectorTableLookup_2: - //assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 2); regCount = 2; break; case NI_AdvSimd_VectorTableLookup_3: case NI_AdvSimd_Arm64_VectorTableLookup_3: - //assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 3); regCount = 3; break; case NI_AdvSimd_VectorTableLookup_4: case NI_AdvSimd_Arm64_VectorTableLookup_4: - //assert(compiler->lvaGetDesc(intrin.op1->AsLclVar())->lvFieldCnt == 4); regCount = 4; break; default: @@ -1072,11 +1068,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou RefPosition* lastRefPos = nullptr; // consecutive registers - int regCount = 0; + RefPosition* currRefPos = nullptr; for (GenTreeFieldList::Use& use : intrin.op1->AsFieldList()->Uses()) { - RefPosition* currRefPos = - BuildUse(use.GetNode(), RBM_NONE, 0, /* needsConsecutive */ true); + currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0, /* needsConsecutive */ true); if (lastRefPos == nullptr) { currRefPos->regCount = regCount; @@ -1091,29 +1086,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou getNextConsecutiveRefPositionsMap()->Set(currRefPos, nullptr); } lastRefPos = currRefPos; - regCount++; } srcCount += regCount; -#ifdef DEBUG - switch (regCount) - { - case 2: - assert((intrin.id == NI_AdvSimd_VectorTableLookup_2) || - (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_2)); - break; - case 3: - assert((intrin.id == NI_AdvSimd_VectorTableLookup_3) || - (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_3)); - break; - case 4: - assert((intrin.id == NI_AdvSimd_VectorTableLookup_4) || - (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup_4)); - break; - default: - assert(!"Unexpected register count for VectorTableLookup"); - break; - } -#endif // DEBUG } } } From a086ab77b18ba461f993e488da538751d654caf2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 24 Jan 2023 07:18:31 -0800 Subject: [PATCH 026/125] jit format and fix arm build --- src/coreclr/jit/gentree.cpp | 2 ++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 2 +- src/coreclr/jit/lsraarm64.cpp | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 22a8578a71b7aa..95b77bcc9a2916 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -12099,12 +12099,14 @@ void Compiler::gtDispTree(GenTree* tree, case NI_System_Runtime_CompilerServices_RuntimeHelpers_IsKnownConstant: printf(" isKnownConst"); break; +#if defined(FEATURE_SIMD) case NI_SIMD_UpperRestore: printf(" simd_upperRestr"); break; case NI_SIMD_UpperSave: printf(" simd_upperSave"); break; +#endif // FEATURE_SIMD default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index a0d8c50e3d0c11..9f586380f58479 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -518,7 +518,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } GenTreeFieldList* fieldList = intrin.op1->AsFieldList(); - GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); if (firstField->IsCopyOrReload()) { diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 46d281079d95fa..c0780ce91ea8ad 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1059,7 +1059,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - unsigned regCount = 0; + unsigned regCount = 0; RefPosition* useRefPos1 = nullptr; RefPosition* nextUseRefPos = nullptr; switch (intrin.id) From 450a08d71ac16b3fbe38edded4fa6de31310bbc6 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 24 Jan 2023 10:32:14 -0800 Subject: [PATCH 027/125] fix assert failure --- src/coreclr/jit/hwintrinsic.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 877e64f41e1a87..02e21031f67175 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1153,13 +1153,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, if ((intrinsic == NI_AdvSimd_VectorTableLookup) || (intrinsic == NI_AdvSimd_Arm64_VectorTableLookup)) { op1 = impPopStack().val; - assert(op1->OperIsLocal()); - LclVarDsc* op1VarDsc = lvaGetDesc(op1->AsLclVar()); - unsigned lclNum = lvaGetLclNum(op1VarDsc); if (op1->TypeGet() == TYP_STRUCT) { - unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); + assert(op1->OperIsLocal()); + + LclVarDsc* op1VarDsc = lvaGetDesc(op1->AsLclVar()); + unsigned lclNum = lvaGetLclNum(op1VarDsc); + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); int offset = 0; From 5bb93021451e82f5fd3c8f8a7e22a643585872f9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 24 Jan 2023 23:54:57 -0800 Subject: [PATCH 028/125] Add summary docs Add summary docs in all the required files. --- .../Arm/AdvSimd.PlatformNotSupported.cs | 70 +++++++++---------- .../System/Runtime/Intrinsics/Arm/AdvSimd.cs | 58 +++++++-------- .../ref/System.Runtime.Intrinsics.cs | 24 +++---- 3 files changed, 76 insertions(+), 76 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs index b22cef9fcc8a13..eead2a33a3ba2d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs @@ -3663,40 +3663,40 @@ internal Arm64() { } public static Vector128 VectorTableLookup(Vector128 table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// uint8x16_t vqtbl2q_u8(uint8x16x2_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x16_t vqtbl2q_s8(int8x16x2_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// uint8x16_t vqtbl3q_u8(uint8x16x3_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x16_t vqtbl3q_s8(int8x16x3_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// uint8x16_t vqtbl4q_u8(uint8x16x4_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x16_t vqtbl4q_s8(int8x16x4_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) @@ -15004,40 +15004,40 @@ internal Arm64() { } public static Vector64 VectorTableLookup(Vector128 table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// uint8x8_t vqtbl2q_u8(uint8x16x2_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B /// - public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x8_t vqtbl2q_u8(int8x16x2_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B /// - public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// uint8x8_t vqtbl3q_u8(uint8x16x3_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B /// - public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x8_t vqtbl3q_u8(int8x16x3_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B /// - public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// uint8x8_t vqtbl4q_u8(uint8x16x4_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B /// - public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x8_t vqtbl4q_u8(int8x16x4_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B /// - public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + internal static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index 82cef9f7d6fb2b..08660044f67ce2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -3661,40 +3661,40 @@ internal Arm64() { } public static Vector128 VectorTableLookup(Vector128 table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) + /// uint8x16_t vqtbl2q_u8(uint8x16x2_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + internal static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x16_t vqtbl2q_s8(int8x16x2_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + internal static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// uint8x16_t vqtbl3q_u8(uint8x16x3_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x16_t vqtbl3q_s8(int8x16x3_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// uint8x16_t vqtbl4q_u8(uint8x16x4_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x16_t vqtbl4q_s8(int8x16x4_t t, uint8x16_t idx) + /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B /// - public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) @@ -15002,38 +15002,38 @@ internal Arm64() { } public static Vector64 VectorTableLookup(Vector128 table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// uint8x8_t vqtbl2q_u8(uint8x16x2_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B /// public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x8_t vqtbl2q_u8(int8x16x2_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B /// public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// uint8x8_t vqtbl3q_u8(uint8x16x3_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B /// public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x8_t vqtbl3q_u8(int8x16x3_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B /// public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// uint8x16_t vqvtbl1q_u8(uint8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// uint8x8_t vqtbl4q_u8(uint8x16x4_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B /// public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); /// - /// int8x16_t vqvtbl1q_s8(int8x16_t t, uint8x16_t idx) - /// A64: TBL Vd.16B, {Vn.16B}, Vm.16B + /// int8x8_t vqtbl4q_u8(int8x16x4_t t, uint8x8_t idx) + /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B /// public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookup(table, byteIndexes); diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 5cf8d0dbe53d16..842e5da1cd49fb 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2856,12 +2856,12 @@ public unsafe static void StoreSelectedScalar(ulong* address, System.Runtime.Int public static System.Runtime.Intrinsics.Vector128 SubtractWideningUpper(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } @@ -3499,12 +3499,12 @@ public unsafe static void StorePairScalarNonTemporal(uint* address, System.Runti public static System.Runtime.Intrinsics.Vector64 UnzipOdd(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } From 8027c5a0bad9f30f6094f27b0e17eaabd5c16da6 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 25 Jan 2023 00:27:31 -0800 Subject: [PATCH 029/125] Make APIs public again --- .../Arm/AdvSimd.PlatformNotSupported.cs | 24 +++++++++---------- .../System/Runtime/Intrinsics/Arm/AdvSimd.cs | 12 +++++----- .../ref/System.Runtime.Intrinsics.cs | 24 +++++++++---------- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs index eead2a33a3ba2d..eaf512e42e7763 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs @@ -3666,37 +3666,37 @@ internal Arm64() { } /// uint8x16_t vqtbl2q_u8(uint8x16x2_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// int8x16_t vqtbl2q_s8(int8x16x2_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x16_t vqtbl3q_u8(uint8x16x3_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// int8x16_t vqtbl3q_s8(int8x16x3_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x16_t vqtbl4q_u8(uint8x16x4_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// int8x16_t vqtbl4q_s8(int8x16x4_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) @@ -15007,37 +15007,37 @@ internal Arm64() { } /// uint8x8_t vqtbl2q_u8(uint8x16x2_t t, uint8x8_t idx) /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B /// - internal static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// int8x8_t vqtbl2q_u8(int8x16x2_t t, uint8x8_t idx) /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B /// - internal static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x8_t vqtbl3q_u8(uint8x16x3_t t, uint8x8_t idx) /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B /// - internal static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// int8x8_t vqtbl3q_u8(int8x16x3_t t, uint8x8_t idx) /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B /// - internal static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x8_t vqtbl4q_u8(uint8x16x4_t t, uint8x8_t idx) /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B /// - internal static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// int8x8_t vqtbl4q_u8(int8x16x4_t t, uint8x8_t idx) /// A64: TBL Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B /// - internal static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + public static Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } /// /// uint8x8_t vqvtbx1_u8(uint8x8_t r, uint8x16_t t, uint8x8_t idx) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index 08660044f67ce2..5a900a69f2de2c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -3664,37 +3664,37 @@ internal Arm64() { } /// uint8x16_t vqtbl2q_u8(uint8x16x2_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// int8x16_t vqtbl2q_s8(int8x16x2_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqtbl3q_u8(uint8x16x3_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// int8x16_t vqtbl3q_s8(int8x16x3_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqtbl4q_u8(uint8x16x4_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// int8x16_t vqtbl4q_s8(int8x16x4_t t, uint8x16_t idx) /// A64: TBL Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B /// - internal static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); + public static Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookup(table, byteIndexes); /// /// uint8x16_t vqvtbx1q_u8(uint8x16_t r, int8x16_t t, uint8x16_t idx) diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 842e5da1cd49fb..5cf8d0dbe53d16 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2856,12 +2856,12 @@ public unsafe static void StoreSelectedScalar(ulong* address, System.Runtime.Int public static System.Runtime.Intrinsics.Vector128 SubtractWideningUpper(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } @@ -3499,12 +3499,12 @@ public unsafe static void StorePairScalarNonTemporal(uint* address, System.Runti public static System.Runtime.Intrinsics.Vector64 UnzipOdd(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - internal static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } From 6b1ba8aa4d42836766bfe2ffbb04f4f9cd3f2ef2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 25 Jan 2023 00:45:11 -0800 Subject: [PATCH 030/125] cleanup --- src/coreclr/jit/gentree.cpp | 6 ------ src/coreclr/jit/gentree.h | 17 ----------------- src/coreclr/jit/hwintrinsic.h | 20 -------------------- src/coreclr/jit/lower.cpp | 4 ---- src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsra.h | 15 ++++++++++++++- src/coreclr/jit/lsraarm64.cpp | 4 ++-- src/coreclr/jit/morph.cpp | 8 ++------ 8 files changed, 19 insertions(+), 57 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 95b77bcc9a2916..6cf6d1352a51c5 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -934,12 +934,6 @@ int GenTree::GetRegisterDstCount(Compiler* compiler) const if (OperIsScalarLocal()) { -#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_ARM64) - if (AsLclVar()->IsMultiRegUse()) - { - return compiler->lvaGetDesc(AsLclVar())->lvFieldCnt; - } -#endif // FEATURE_HW_INTRINSICS && TARGET_ARM64 return AsLclVar()->GetFieldCount(compiler); } assert(!"Unexpected multi-reg node"); diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index eca20fc7794cd1..fece4ad43f4176 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -3743,9 +3743,6 @@ struct GenTreeLclVar : public GenTreeLclVarCommon private: regNumberSmall gtOtherReg[MAX_MULTIREG_COUNT - 1]; MultiRegSpillFlags gtSpillFlags; -#ifdef TARGET_ARM64 - bool isMultiRegUse; -#endif public: INDEBUG(IL_OFFSET gtLclILoffs;) // instr offset of ref (only for JIT dumps) @@ -3767,20 +3764,6 @@ struct GenTreeLclVar : public GenTreeLclVarCommon ClearOtherRegFlags(); } -#ifdef TARGET_ARM64 - bool IsMultiRegUse() const - { - assert(!isMultiRegUse || ((gtFlags & GTF_VAR_MULTIREG) != 0)); - return isMultiRegUse; - } - - void SetMultiRegUse() - { - isMultiRegUse = true; - SetMultiReg(); - } -#endif - regNumber GetRegNumByIdx(int regIndex) const { assert(regIndex < MAX_MULTIREG_COUNT); diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 78e24bb0b05b8f..b1299df1c1f1cf 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -773,26 +773,6 @@ struct HWIntrinsicInfo return (flags & HW_Flag_MultiReg) != 0; } - static int GetMultiRegUseCount(NamedIntrinsic id) - { - switch (id) - { -#ifdef TARGET_ARM64 - case NI_AdvSimd_VectorTableLookup_2: - case NI_AdvSimd_Arm64_VectorTableLookup_2: - return 2; - case NI_AdvSimd_VectorTableLookup_3: - case NI_AdvSimd_Arm64_VectorTableLookup_3: - return 3; - case NI_AdvSimd_VectorTableLookup_4: - case NI_AdvSimd_Arm64_VectorTableLookup_4: - return 4; -#endif - default: - unreached(); - } - } - static int GetMultiRegCount(NamedIntrinsic id) { assert(IsMultiReg(id)); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0a708002294af0..93df4f3c690bfc 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -6661,11 +6661,7 @@ void Lowering::CheckNode(Compiler* compiler, GenTree* node) #endif // FEATURE_SIMD && TARGET_64BIT if (varDsc->lvPromoted) { -#ifdef TARGET_ARM64 - assert(varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegRet || node->AsLclVar()->IsMultiRegUse()); -#else assert(varDsc->lvDoNotEnregister || varDsc->lvIsMultiRegRet); -#endif // TARGET_ARM64 } } break; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 90950af8cf5e9c..115dbaf75efb98 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2849,7 +2849,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, refPosition->registerAssignment = foundRegBit; #ifdef TARGET_ARM64 - if (refPosition->needsConsecutive && (refPosition->regCount != 0)) + if (refPosition->isFirstRefPositionOfConsecutiveRegisters()) { setNextConsecutiveRegisterAssignment(refPosition, foundRegBit); } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 0b3736b4f0d9b0..180599ce02d113 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1200,7 +1200,7 @@ class LinearScan : public LinearScanInterface #endif // TARGET_ARM return result; } -#endif +#endif // !TARGET_ARM64 #ifdef DEBUG class RegisterSelection; @@ -2533,6 +2533,19 @@ class RefPosition return (isFixedRefOfRegMask(genRegMask(regNum))); } +#ifdef TARGET_ARM64 + /// For consecutive registers, returns true if this RefPosition is + /// the first of the series. + FORCEINLINE bool isFirstRefPositionOfConsecutiveRegisters() + { + if (needsConsecutive) + { + return regCount != 0; + } + return false; + } +#endif // TARGET_ARM64 + #ifdef DEBUG // operator= copies everything except 'rpNum', which must remain unique RefPosition& operator=(const RefPosition& rp) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index c0780ce91ea8ad..ec7006c6e06064 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -29,7 +29,7 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo { regMaskTP result = candidates & m_AvailableRegs; - if (!refPosition->needsConsecutive || (refPosition->regCount == 0)) + if (!refPosition->isFirstRefPositionOfConsecutiveRegisters()) { return result; } @@ -74,7 +74,7 @@ RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regMaskTP firstRegAssigned) { assert(isSingleRegister(firstRegAssigned)); - assert(firstRefPosition->needsConsecutive && firstRefPosition->regCount > 0); + assert(firstRefPosition->isFirstRefPositionOfConsecutiveRegisters()); RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index cf072f7fc1008f..21d87671a41438 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12075,16 +12075,12 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) // Promoted structs after morph must be in one of two states: // a) Fully eliminated from the IR (independent promotion) OR only be - // used by "special" nodes (e. g. LHS of ASGs for multi-reg structs or RHS of ASGs for multi-use). + // used by "special" nodes (e. g. LHS of ASGs for multi-reg structs). // b) Marked as do-not-enregister (dependent promotion). // // So here we preserve this invariant and mark any promoted structs as do-not-enreg. // - if (operand->OperIs(GT_LCL_VAR) && lvaGetDesc(operand->AsLclVar())->lvPromoted -#ifdef TARGET_ARM64 - && !operand->AsLclVar()->IsMultiRegUse() -#endif - ) + if (operand->OperIs(GT_LCL_VAR) && lvaGetDesc(operand->AsLclVar())->lvPromoted) { lvaSetVarDoNotEnregister(operand->AsLclVar()->GetLclNum() DEBUGARG(DoNotEnregisterReason::SimdUserForcesDep)); From 723477b0bbfbb0287fa1aba3444fe4d842ea7f47 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 25 Jan 2023 01:43:44 -0800 Subject: [PATCH 031/125] Handle case for reg mod 32 --- src/coreclr/jit/lsraarm64.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index ec7006c6e06064..221b2fbd446f80 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -81,17 +81,21 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit // should have at least one consecutive register requirement assert(consecutiveRefPosition != nullptr); - regMaskTP registerToAssign = firstRegAssigned; - int refPosCount = 1; + regNumber firstReg = genRegNumFromMask(firstRegAssigned); + regNumber regToAssign = firstReg == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstReg); + +#ifdef DEBUG + int refPosCount = 1; +#endif // DEBUG while (consecutiveRefPosition != nullptr) { - registerToAssign <<= 1; - consecutiveRefPosition->registerAssignment = registerToAssign; + consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); #ifdef DEBUG refPosCount++; -#endif +#endif // DEBUG } assert(refPosCount == firstRefPosition->regCount); From c6e77e443621247d655e95e2b4dd2f0c4861460a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 25 Jan 2023 10:07:41 -0800 Subject: [PATCH 032/125] Remove references from ref until API is approved --- .../ref/System.Runtime.Intrinsics.cs | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 5cf8d0dbe53d16..fec48c2c5c1bed 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2856,12 +2856,6 @@ public unsafe static void StoreSelectedScalar(ulong* address, System.Runtime.Int public static System.Runtime.Intrinsics.Vector128 SubtractWideningUpper(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } @@ -3499,12 +3493,6 @@ public unsafe static void StorePairScalarNonTemporal(uint* address, System.Runti public static System.Runtime.Intrinsics.Vector64 UnzipOdd(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } From 5696a6e7adf578d54f74be1d5057fb6b5e0d27ad Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 25 Jan 2023 10:07:54 -0800 Subject: [PATCH 033/125] Use generic getFreeCandidates() --- src/coreclr/jit/lsra.cpp | 4 ---- src/coreclr/jit/lsra.h | 5 ++--- src/coreclr/jit/lsraarm64.cpp | 17 ----------------- 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 115dbaf75efb98..5bdabf9ee5778e 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12008,11 +12008,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, reverseSelect = linearScan->doReverseSelect(); #endif // DEBUG -#if defined(TARGET_ARM64) - freeCandidates = linearScan->getFreeCandidates(candidates, refPosition); -#else freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); -#endif // TARGET_ARM // If no free candidates, then double check if refPosition is an actual ref. if (freeCandidates == RBM_NONE) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 180599ce02d113..74b07309dcf078 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1184,9 +1184,9 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ #if defined(TARGET_ARM64) - regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regMaskTP firstRegAssigned); -#else +#endif // TARGET_ARM64 + regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) { regMaskTP result = candidates & m_AvailableRegs; @@ -1200,7 +1200,6 @@ class LinearScan : public LinearScanInterface #endif // TARGET_ARM return result; } -#endif // !TARGET_ARM64 #ifdef DEBUG class RegisterSelection; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 221b2fbd446f80..624f26910736cc 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -25,23 +25,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "sideeffects.h" #include "lower.h" -regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) -{ - regMaskTP result = candidates & m_AvailableRegs; - - if (!refPosition->isFirstRefPositionOfConsecutiveRegisters()) - { - return result; - } - - // If refPosition->regCount != 0, we need to make sure we check for all the - // `regCount` available regs. - - result &= (m_AvailableRegs >> (refPosition->regCount - 1)); - - return result; -} - //------------------------------------------------------------------------ // getNextConsecutiveRefPosition: Get the next subsequent refPosition. // From a9e1a7a7ef3015921b93a4cc6e7c57c00289a4d7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 25 Jan 2023 12:15:10 -0800 Subject: [PATCH 034/125] Add entries in ExtraAPis --- .../ref/System.Private.CoreLib.ExtraApis.cs | 22 +++++++++++++++++++ .../ref/System.Private.CoreLib.ExtraApis.txt | 12 ++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs index 84e99da5aa050f..4b9ab328a2754d 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs @@ -37,3 +37,25 @@ public static partial class Debug public static System.Diagnostics.DebugProvider SetProvider(System.Diagnostics.DebugProvider provider) { throw null; } } } +namespace System.Runtime.Intrinsics.Arm +{ + public abstract partial class AdvSimd : System.Runtime.Intrinsics.Arm.ArmBase + { + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + } + + public new abstract partial class Arm64 : System.Runtime.Intrinsics.Arm.ArmBase.Arm64 + { + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + } +} diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt index 0babd819e25d04..5a861db7629d75 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt @@ -5,3 +5,15 @@ T:System.Runtime.Serialization.DeserializationToken M:System.Runtime.Serialization.SerializationInfo.StartDeserialization T:System.Diagnostics.DebugProvider M:System.Diagnostics.Debug.SetProvider(System.Diagnostics.DebugProvider) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) \ No newline at end of file From 2617b774db206a612171bee8d12c2d5bb4bf9a70 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 25 Jan 2023 12:27:56 -0800 Subject: [PATCH 035/125] Set CLSCompliant=false --- .../ref/System.Private.CoreLib.ExtraApis.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs index 4b9ab328a2754d..1472ff20651cc9 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs @@ -39,7 +39,7 @@ public static partial class Debug } namespace System.Runtime.Intrinsics.Arm { - public abstract partial class AdvSimd : System.Runtime.Intrinsics.Arm.ArmBase + public abstract partial class AdvSimd : ArmBase { public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } @@ -49,7 +49,8 @@ public abstract partial class AdvSimd : System.Runtime.Intrinsics.Arm.ArmBase public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } } - public new abstract partial class Arm64 : System.Runtime.Intrinsics.Arm.ArmBase.Arm64 + [CLSCompliant(false)] + public abstract partial class Arm64 : ArmBase.Arm64 { public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } From 2d4fd5ce3ed69817f8060269c780a4b429cc3171 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 25 Jan 2023 14:45:20 -0800 Subject: [PATCH 036/125] Move in inner class --- .../ref/System.Private.CoreLib.ExtraApis.cs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs index 1472ff20651cc9..4a05ff380a3c96 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs @@ -47,16 +47,16 @@ public abstract partial class AdvSimd : ArmBase public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - } - [CLSCompliant(false)] - public abstract partial class Arm64 : ArmBase.Arm64 - { - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + [CLSCompliant(false)] + public abstract partial class Arm64 : ArmBase.Arm64 + { + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + } } } From c6d338f720ba08686dab905497304c182ddb817d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 25 Jan 2023 22:56:51 -0800 Subject: [PATCH 037/125] Remove CLSCompliant flag --- .../ref/System.Private.CoreLib.ExtraApis.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs index 4a05ff380a3c96..97ff2a1d64f1e3 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs @@ -48,7 +48,6 @@ public abstract partial class AdvSimd : ArmBase public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - [CLSCompliant(false)] public abstract partial class Arm64 : ArmBase.Arm64 { public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } From db4f846d24f120162ffe35ea5a38e8d4a50b33e2 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 26 Jan 2023 15:22:05 -0800 Subject: [PATCH 038/125] Add a suppression file for System.Runtime.Intrinsics on the new APIs until it they go through API review --- .../src/CompatibilitySuppressions.xml | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml diff --git a/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml b/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml new file mode 100644 index 00000000000000..3d3a055c0f9332 --- /dev/null +++ b/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml @@ -0,0 +1,76 @@ + + + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + \ No newline at end of file From bce8c5a53527cdda8dd9dd5da095c8a453127437 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 31 Jan 2023 22:20:23 -0800 Subject: [PATCH 039/125] Review feedback --- src/coreclr/jit/hwintrinsic.cpp | 68 +----------------- src/coreclr/jit/hwintrinsicarm64.cpp | 79 +++++++++++++++++++++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 21 +++--- src/coreclr/jit/hwintrinsiclistarm64.h | 16 ++--- src/coreclr/jit/lsra.h | 5 +- src/coreclr/jit/lsraarm64.cpp | 4 +- src/coreclr/jit/lsrabuild.cpp | 11 +-- 7 files changed, 109 insertions(+), 95 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 02e21031f67175..341f14a460b423 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1148,72 +1148,10 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, case 2: op2 = getArgForHWIntrinsic(sigReader.GetOp2Type(), sigReader.op2ClsHnd); op2 = addRangeCheckIfNeeded(intrinsic, op2, mustExpand, immLowerBound, immUpperBound); + op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); -#ifdef TARGET_ARM64 - if ((intrinsic == NI_AdvSimd_VectorTableLookup) || (intrinsic == NI_AdvSimd_Arm64_VectorTableLookup)) - { - op1 = impPopStack().val; - - if (op1->TypeGet() == TYP_STRUCT) - { - assert(op1->OperIsLocal()); - - LclVarDsc* op1VarDsc = lvaGetDesc(op1->AsLclVar()); - unsigned lclNum = lvaGetLclNum(op1VarDsc); - unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sigReader.op1ClsHnd); - - GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - int offset = 0; - for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) - { - GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, TYP_SIMD16, offset); - fieldList->AddField(this, fldNode, offset, TYP_SIMD16); - - offset += op1VarDsc->lvSize() / fieldCount; - } - op1 = fieldList; - - switch (fieldCount) - { - case 1: - // NI_AdvSimd_VectorTableLookup - // NI_AdvSimd_Arm64_VectorTableLookup - break; - case 2: - // NI_AdvSimd_VectorTableLookup_2 - // NI_AdvSimd_Arm64_VectorTableLookup_2 - intrinsic = (NamedIntrinsic)(intrinsic + 1); - break; - case 3: - // NI_AdvSimd_VectorTableLookup_3 - // NI_AdvSimd_Arm64_VectorTableLookup_3 - intrinsic = (NamedIntrinsic)(intrinsic + 2); - break; - case 4: - // NI_AdvSimd_VectorTableLookup_4 - // NI_AdvSimd_Arm64_VectorTableLookup_4 - intrinsic = (NamedIntrinsic)(intrinsic + 3); - break; - default: - noway_assert("Unknown field count"); - } - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - } - else - { - assert(op1->TypeGet() == TYP_SIMD16); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - } - } - else -#endif - { - op1 = getArgForHWIntrinsic(sigReader.GetOp1Type(), sigReader.op1ClsHnd); - retNode = isScalar - ? gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic) - : gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - } + retNode = isScalar ? gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic) + : gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); #ifdef TARGET_XARCH if ((intrinsic == NI_SSE42_Crc32) || (intrinsic == NI_SSE42_X64_Crc32)) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 93e36b710e065b..7e5934b94a206a 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1876,6 +1876,85 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = impAssignMultiRegTypeToVar(op1, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed)); break; } + case NI_AdvSimd_VectorTableLookup: + case NI_AdvSimd_Arm64_VectorTableLookup: + { + assert(sig->numArgs == 2); + + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = impPopStack().val; + + if (!op1->OperIsLocal()) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); + + impAssignTempGen(tmp, op1, CHECK_SPILL_NONE); + op1 = gtNewLclvNode(tmp, argType); + } + + if (argType == TYP_STRUCT) + { + assert(op1->OperIsLocal()); + + LclVarDsc* op1VarDsc = lvaGetDesc(op1->AsLclVar()); + unsigned lclNum = lvaGetLclNum(op1VarDsc); + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); + unsigned fieldSize = op1VarDsc->lvSize() / fieldCount; + var_types fieldType = TYP_SIMD16; + + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + int offset = 0; + for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) + { + GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fieldType, offset); + fieldList->AddField(this, fldNode, offset, fieldType); + + offset += fieldSize; + } + op1 = fieldList; + + switch (fieldCount) + { + case 1: + // NI_AdvSimd_VectorTableLookup + // NI_AdvSimd_Arm64_VectorTableLookup + break; + case 2: + // NI_AdvSimd_VectorTableLookup_2 + // NI_AdvSimd_Arm64_VectorTableLookup_2 + intrinsic = (NamedIntrinsic)(intrinsic + 1); + break; + case 3: + // NI_AdvSimd_VectorTableLookup_3 + // NI_AdvSimd_Arm64_VectorTableLookup_3 + intrinsic = (NamedIntrinsic)(intrinsic + 2); + break; + case 4: + // NI_AdvSimd_VectorTableLookup_4 + // NI_AdvSimd_Arm64_VectorTableLookup_4 + intrinsic = (NamedIntrinsic)(intrinsic + 3); + break; + default: + noway_assert(!"Unknown field count"); + } + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + } + else + { + assert(argType == TYP_SIMD16); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + } + break; + } default: { diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 9f586380f58479..d80f7844dca751 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -519,21 +519,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GenTreeFieldList* fieldList = intrin.op1->AsFieldList(); GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); - - if (firstField->IsCopyOrReload()) + op1Reg = firstField->GetRegNum(); +#ifdef DEBUG + regNumber argReg = op1Reg; + for (GenTreeFieldList::Use& use : fieldList->Uses()) { - // If value is copied in a register to satisfy the consecutive-register - // requirement, make sure to get the source's register because these - // instruction encoding takes only the 1st register and infer the rest - // from that. - GenTree* op1 = firstField->AsCopyOrReload()->gtGetOp1(); - assert(!op1->IsCopyOrReload()); - op1Reg = op1->GetRegNum(); - } - else - { - op1Reg = firstField->GetRegNum(); + GenTree* argNode = use.GetNode(); + assert(argReg == argNode->GetRegNum()); + argReg = (regNumber)(argReg + 1); } +#endif GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; } diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 5e6b6aaf434733..374982e337785b 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -475,10 +475,10 @@ HARDWARE_INTRINSIC(AdvSimd, SubtractSaturateScalar, HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sub, INS_sub, INS_fsub, INS_fsub}, HW_Category_SIMD, HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, {INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, {INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_2, 8, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_3, 8, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_4, 8, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_2, 8, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_3, 8, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_4, 8, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, {INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor}, HW_Category_SIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, {INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) @@ -652,10 +652,10 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeEven, HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, -1, 2, {INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, {INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_2, 16, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_3, 16, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_4, 16, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_2, 16, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_3, 16, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_4, 16, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, {INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, {INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1}, HW_Category_SIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 74b07309dcf078..c50c82b60c665e 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1860,9 +1860,8 @@ class LinearScan : public LinearScanInterface bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); RefPosition* BuildUse(GenTree* operand, - regMaskTP candidates = RBM_NONE, - int multiRegIdx = 0, - bool needsConsecutive = false); + regMaskTP candidates = RBM_NONE, + int multiRegIdx = 0 ARM64_ARG(bool needsConsecutive = false)); void setDelayFree(RefPosition* use); int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 624f26910736cc..d8d9c756c6f5c7 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -64,7 +64,7 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit // should have at least one consecutive register requirement assert(consecutiveRefPosition != nullptr); - regNumber firstReg = genRegNumFromMask(firstRegAssigned); + regNumber firstReg = genRegNumFromMask(firstRegAssigned); regNumber regToAssign = firstReg == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstReg); #ifdef DEBUG @@ -1080,7 +1080,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou RefPosition* currRefPos = nullptr; for (GenTreeFieldList::Use& use : intrin.op1->AsFieldList()->Uses()) { - currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0, /* needsConsecutive */ true); + currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0 ARM64_ARG(/* needsConsecutive */ true)); if (lastRefPos == nullptr) { currRefPos->regCount = regCount; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index a7610e0d433539..0fed5376188041 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3025,9 +3025,10 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // the defList, and build a use RefPosition for the associated Interval. // // Arguments: -// operand - The node of interest -// candidates - The register candidates for the use -// multiRegIdx - The index of the multireg def/use +// operand - The node of interest +// candidates - The register candidates for the use +// multiRegIdx - The index of the multireg def/use +// needsConsecutive - If the operand needs consecutive registers. // // Return Value: // The newly created use RefPosition @@ -3035,7 +3036,9 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // Notes: // The node must not be contained, and must have been processed by buildRefPositionsForNode(). // -RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx, bool needsConsecutive) +RefPosition* LinearScan::BuildUse(GenTree* operand, + regMaskTP candidates, + int multiRegIdx ARM64_ARG(bool needsConsecutive)) { assert(!operand->isContained()); Interval* interval; From 3d15fcbd53ab7293eb710a2081ad684f5a3fd406 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 31 Jan 2023 22:29:53 -0800 Subject: [PATCH 040/125] Add workaround for building tests --- .../src/System/Runtime/Intrinsics/Arm/AdvSimd.cs | 2 ++ .../src/System/Runtime/Intrinsics/Arm/ArmBase.cs | 2 ++ src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj | 1 + src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj | 2 ++ src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/Program.AdvSimd.cs | 2 ++ 5 files changed, 9 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index 5a900a69f2de2c..7a09be5a0820cf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -10,7 +10,9 @@ namespace System.Runtime.Intrinsics.Arm /// This class provides access to the ARM AdvSIMD hardware instructions via intrinsics /// [Intrinsic] +#if SYSTEM_PRIVATE_CORELIB [CLSCompliant(false)] +#endif public abstract class AdvSimd : ArmBase { internal AdvSimd() { } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/ArmBase.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/ArmBase.cs index dd378377f5c506..d6c60878ecc37c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/ArmBase.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/ArmBase.cs @@ -9,7 +9,9 @@ namespace System.Runtime.Intrinsics.Arm /// This class provides access to the ARM base hardware instructions via intrinsics /// [Intrinsic] +#if SYSTEM_PRIVATE_CORELIB [CLSCompliant(false)] +#endif public abstract class ArmBase { internal ArmBase() { } diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj index 2687de5191369f..03cdf582f33d65 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj @@ -11,5 +11,6 @@ + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj index d7a4cd3f91a416..4423a589686ffc 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj @@ -7,9 +7,11 @@ Embedded True + + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/Program.AdvSimd.cs b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/Program.AdvSimd.cs index af869e6e80f7fc..48c6003e615bd7 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/Program.AdvSimd.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/Program.AdvSimd.cs @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +extern alias CoreLib; +global using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Collections.Generic; From 75f142bf5323283ad1d12290c6f67cbff946a5e9 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 31 Jan 2023 22:33:49 -0800 Subject: [PATCH 041/125] review feedback --- src/coreclr/jit/codegenlinear.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index d86afbe28f7668..b1df3b823ea44f 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1622,7 +1622,7 @@ void CodeGen::genConsumeRegs(GenTree* tree) for (GenTreeFieldList::Use& use : tree->AsFieldList()->Uses()) { GenTree* fieldNode = use.GetNode(); - genConsumeReg(fieldNode); + genConsumeRegs(fieldNode); } } #endif From e4cbad998f78511b07248b9abaec601b31ff6f9d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 1 Feb 2023 00:23:45 -0800 Subject: [PATCH 042/125] TP: remove needsConsecutive parameter from BuildUse() --- src/coreclr/jit/lsra.h | 4 +--- src/coreclr/jit/lsrabuild.cpp | 14 ++++---------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index c50c82b60c665e..727885a914174a 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1859,9 +1859,7 @@ class LinearScan : public LinearScanInterface bool isCandidateMultiRegLclVar(GenTreeLclVar* lclNode); bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); - RefPosition* BuildUse(GenTree* operand, - regMaskTP candidates = RBM_NONE, - int multiRegIdx = 0 ARM64_ARG(bool needsConsecutive = false)); + RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0); void setDelayFree(RefPosition* use); int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 0fed5376188041..1ef61660e80ec6 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3025,10 +3025,9 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // the defList, and build a use RefPosition for the associated Interval. // // Arguments: -// operand - The node of interest -// candidates - The register candidates for the use -// multiRegIdx - The index of the multireg def/use -// needsConsecutive - If the operand needs consecutive registers. +// operand - The node of interest +// candidates - The register candidates for the use +// multiRegIdx - The index of the multireg def/use // // Return Value: // The newly created use RefPosition @@ -3036,9 +3035,7 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // Notes: // The node must not be contained, and must have been processed by buildRefPositionsForNode(). // -RefPosition* LinearScan::BuildUse(GenTree* operand, - regMaskTP candidates, - int multiRegIdx ARM64_ARG(bool needsConsecutive)) +RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) { assert(!operand->isContained()); Interval* interval; @@ -3092,9 +3089,6 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, operand = nullptr; } RefPosition* useRefPos = newRefPosition(interval, currentLoc, RefTypeUse, operand, candidates, multiRegIdx); -#ifdef TARGET_ARM64 - useRefPos->needsConsecutive = needsConsecutive; -#endif useRefPos->setRegOptional(regOptional); return useRefPos; } From 96de0247b38edc94e2b33bf769ae6d210001c8c8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 1 Feb 2023 00:37:27 -0800 Subject: [PATCH 043/125] TP: Remove pseudo intrinsic entries --- src/coreclr/jit/gentree.h | 2 - src/coreclr/jit/hwintrinsicarm64.cpp | 25 ------- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 49 ++++++++------ src/coreclr/jit/hwintrinsiclistarm64.h | 10 +-- src/coreclr/jit/lsraarm64.cpp | 74 +++++++++------------ 5 files changed, 63 insertions(+), 97 deletions(-) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index fece4ad43f4176..ea8731460bf7c9 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -3752,12 +3752,10 @@ struct GenTreeLclVar : public GenTreeLclVarCommon { return ((gtFlags & GTF_VAR_MULTIREG) != 0); } - void ClearMultiReg() { gtFlags &= ~GTF_VAR_MULTIREG; } - void SetMultiReg() { gtFlags |= GTF_VAR_MULTIREG; diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 7e5934b94a206a..497b3b424a85dc 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1921,31 +1921,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } op1 = fieldList; - switch (fieldCount) - { - case 1: - // NI_AdvSimd_VectorTableLookup - // NI_AdvSimd_Arm64_VectorTableLookup - break; - case 2: - // NI_AdvSimd_VectorTableLookup_2 - // NI_AdvSimd_Arm64_VectorTableLookup_2 - intrinsic = (NamedIntrinsic)(intrinsic + 1); - break; - case 3: - // NI_AdvSimd_VectorTableLookup_3 - // NI_AdvSimd_Arm64_VectorTableLookup_3 - intrinsic = (NamedIntrinsic)(intrinsic + 2); - break; - case 4: - // NI_AdvSimd_VectorTableLookup_4 - // NI_AdvSimd_Arm64_VectorTableLookup_4 - intrinsic = (NamedIntrinsic)(intrinsic + 3); - break; - default: - noway_assert(!"Unknown field count"); - } - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); } else diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index d80f7844dca751..2fafc229f5708b 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -424,19 +424,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) instruction ins = INS_invalid; switch (intrin.id) { - case NI_AdvSimd_VectorTableLookup_2: - case NI_AdvSimd_Arm64_VectorTableLookup_2: - ins = INS_tbl_2regs; - break; - - case NI_AdvSimd_VectorTableLookup_3: - case NI_AdvSimd_Arm64_VectorTableLookup_3: - ins = INS_tbl_3regs; - break; - - case NI_AdvSimd_VectorTableLookup_4: - case NI_AdvSimd_Arm64_VectorTableLookup_4: - ins = INS_tbl_4regs; + case NI_AdvSimd_VectorTableLookup: + case NI_AdvSimd_Arm64_VectorTableLookup: + ins = INS_tbl; break; case NI_AdvSimd_AddWideningLower: @@ -504,12 +494,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrin.id) { - case NI_AdvSimd_VectorTableLookup_2: - case NI_AdvSimd_Arm64_VectorTableLookup_2: - case NI_AdvSimd_VectorTableLookup_3: - case NI_AdvSimd_Arm64_VectorTableLookup_3: - case NI_AdvSimd_VectorTableLookup_4: - case NI_AdvSimd_Arm64_VectorTableLookup_4: + case NI_AdvSimd_VectorTableLookup: + case NI_AdvSimd_Arm64_VectorTableLookup: { if (!intrin.op1->OperIsFieldList()) @@ -522,13 +508,36 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) op1Reg = firstField->GetRegNum(); #ifdef DEBUG regNumber argReg = op1Reg; +#endif + unsigned regCount = 0; for (GenTreeFieldList::Use& use : fieldList->Uses()) { + regCount++; +#ifdef DEBUG + GenTree* argNode = use.GetNode(); assert(argReg == argNode->GetRegNum()); argReg = (regNumber)(argReg + 1); - } #endif + } + + switch (regCount) + { + case 2: + ins = INS_tbl_2regs; + break; + case 3: + ins = INS_tbl_3regs; + break; + case 4: + ins = INS_tbl_4regs; + break; + default: + assert(regCount == 1); + assert(ins == INS_tbl); + break; + } + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; } diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 374982e337785b..d253267e28cea9 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -475,10 +475,7 @@ HARDWARE_INTRINSIC(AdvSimd, SubtractSaturateScalar, HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sub, INS_sub, INS_fsub, INS_fsub}, HW_Category_SIMD, HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, {INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, {INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_2, 8, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_3, 8, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup_4, 8, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, {INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor}, HW_Category_SIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, {INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) @@ -652,10 +649,7 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeEven, HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, -1, 2, {INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, {INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_2, 16, 2, {INS_tbl_2regs, INS_tbl_2regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_3, 16, 2, {INS_tbl_3regs, INS_tbl_3regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup_4, 16, 2, {INS_tbl_4regs, INS_tbl_4regs, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, {INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, {INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1}, HW_Category_SIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index d8d9c756c6f5c7..4d50282aae9bf1 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1046,57 +1046,47 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - unsigned regCount = 0; - RefPosition* useRefPos1 = nullptr; - RefPosition* nextUseRefPos = nullptr; - switch (intrin.id) + if ((intrin.id == NI_AdvSimd_VectorTableLookup) || (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup)) { - case NI_AdvSimd_VectorTableLookup_2: - case NI_AdvSimd_Arm64_VectorTableLookup_2: - regCount = 2; - break; - case NI_AdvSimd_VectorTableLookup_3: - case NI_AdvSimd_Arm64_VectorTableLookup_3: - regCount = 3; - break; - case NI_AdvSimd_VectorTableLookup_4: - case NI_AdvSimd_Arm64_VectorTableLookup_4: - regCount = 4; - break; - default: - regCount = 1; - break; - } - - if (regCount == 1) - { - srcCount += BuildOperandUses(intrin.op1); - } - else - { - RefPosition* lastRefPos = nullptr; - // consecutive registers - - RefPosition* currRefPos = nullptr; - for (GenTreeFieldList::Use& use : intrin.op1->AsFieldList()->Uses()) + if (intrin.op1->OperIsFieldList()) { - currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0 ARM64_ARG(/* needsConsecutive */ true)); - if (lastRefPos == nullptr) - { - currRefPos->regCount = regCount; - } - else + unsigned regCount = 0; + RefPosition* currRefPos = nullptr; + RefPosition* firstRefPos = nullptr; + RefPosition* lastRefPos = nullptr; + + for (GenTreeFieldList::Use& use : intrin.op1->AsFieldList()->Uses()) { - // Explicitely set regCount=0 so we can identify that this is non-first refposition. - currRefPos->regCount = 0; + currRefPos = BuildUse(use.GetNode()); + currRefPos->needsConsecutive = true; + currRefPos->regCount = 0; + + if (firstRefPos == nullptr) + { + firstRefPos = currRefPos; + } getNextConsecutiveRefPositionsMap()->Set(lastRefPos, currRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); getNextConsecutiveRefPositionsMap()->Set(currRefPos, nullptr); + + lastRefPos = currRefPos; + regCount++; } - lastRefPos = currRefPos; + + // Just `regCount` to actual registers count for first ref-position. + // For others, set 0 so we can identify that this is non-first refposition. + firstRefPos->regCount = regCount; + srcCount += regCount; } - srcCount += regCount; + else + { + srcCount += BuildOperandUses(intrin.op1); + } + } + else + { + srcCount += BuildOperandUses(intrin.op1); } } } From 11b345af5486d26bee1f2f3e78c5418b709e3601 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 1 Feb 2023 22:23:30 -0800 Subject: [PATCH 044/125] More fixes --- src/coreclr/jit/hwintrinsicarm64.cpp | 18 +++++------ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 36 ++++++++++----------- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 26 +++++++-------- 4 files changed, 39 insertions(+), 43 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 497b3b424a85dc..424689ff13dade 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1892,17 +1892,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); op1 = impPopStack().val; - if (!op1->OperIsLocal()) + if (op1->TypeGet() == TYP_STRUCT) { - unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); - - impAssignTempGen(tmp, op1, CHECK_SPILL_NONE); - op1 = gtNewLclvNode(tmp, argType); - } + if (!op1->OperIs(GT_LCL_VAR)) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); - if (argType == TYP_STRUCT) - { - assert(op1->OperIsLocal()); + impAssignTempGen(tmp, op1, CHECK_SPILL_NONE); + op1 = gtNewLclvNode(tmp, argType); + } LclVarDsc* op1VarDsc = lvaGetDesc(op1->AsLclVar()); unsigned lclNum = lvaGetLclNum(op1VarDsc); @@ -1925,7 +1923,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } else { - assert(argType == TYP_SIMD16); + assert(op1->TypeGet() == TYP_SIMD16); retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); } break; diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 2fafc229f5708b..9843cd01babb20 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -497,28 +497,28 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_VectorTableLookup: case NI_AdvSimd_Arm64_VectorTableLookup: { - - if (!intrin.op1->OperIsFieldList()) - { - assert(!"Expect the first operand of VectorTableLookup to be FIELD_LIST"); - } - - GenTreeFieldList* fieldList = intrin.op1->AsFieldList(); - GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); - op1Reg = firstField->GetRegNum(); -#ifdef DEBUG - regNumber argReg = op1Reg; -#endif - unsigned regCount = 0; - for (GenTreeFieldList::Use& use : fieldList->Uses()) + unsigned regCount = 1; + if (intrin.op1->OperIsFieldList()) { - regCount++; + GenTreeFieldList* fieldList = intrin.op1->AsFieldList(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + op1Reg = firstField->GetRegNum(); + INDEBUG(regNumber argReg = op1Reg); + unsigned regCount = 0; + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + regCount++; #ifdef DEBUG - GenTree* argNode = use.GetNode(); - assert(argReg == argNode->GetRegNum()); - argReg = (regNumber)(argReg + 1); + GenTree* argNode = use.GetNode(); + assert(argReg == argNode->GetRegNum()); + argReg = REG_NEXT(argReg); #endif + } + } + else + { + op1Reg = intrin.op1->GetRegNum(); } switch (regCount) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 727885a914174a..c440ec56d57d4c 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2281,7 +2281,7 @@ class RefPosition #ifdef TARGET_ARM64 // If this refposition needs consecutive register assignment - bool needsConsecutive; + unsigned char needsConsecutive : 1; // How many consecutive registers does this and subsequent refPositions need unsigned char regCount : 3; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 4d50282aae9bf1..27a56ff8586f3d 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -44,32 +44,29 @@ RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) } //------------------------------------------------------------------------ -// setNextConsecutiveRegisterAssignment: Set the consecutive register mask to the -// subsequent refpositions +// setNextConsecutiveRegisterAssignment: For subsequent refPositions, set the register +// requirement to be the consecutive register(s) of the register that is assigned to +// the firstRefPosition. // // Arguments: -// tree - The GT_HWINTRINSIC node of interest -// pDstCount - OUT parameter - the number of registers defined for the given node -// -// Return Value: -// The number of sources consumed by this node. +// firstRefPosition - First refPosition of the series of consecutive registers. +// firstRegAssigned - Register assigned to the first refposition. // void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regMaskTP firstRegAssigned) { + regNumber firstReg = genRegNumFromMask(firstRegAssigned); assert(isSingleRegister(firstRegAssigned)); assert(firstRefPosition->isFirstRefPositionOfConsecutiveRegisters()); + assert(emitter::isVectorRegister(firstReg)); RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); // should have at least one consecutive register requirement assert(consecutiveRefPosition != nullptr); - regNumber firstReg = genRegNumFromMask(firstRegAssigned); regNumber regToAssign = firstReg == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstReg); -#ifdef DEBUG - int refPosCount = 1; -#endif // DEBUG + INDEBUG(int refPosCount = 1); while (consecutiveRefPosition != nullptr) { consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); @@ -1055,6 +1052,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou RefPosition* firstRefPos = nullptr; RefPosition* lastRefPos = nullptr; + NextConsecutiveRefPositionsMap* refPositionMap = getNextConsecutiveRefPositionsMap(); for (GenTreeFieldList::Use& use : intrin.op1->AsFieldList()->Uses()) { currRefPos = BuildUse(use.GetNode()); @@ -1066,9 +1064,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou firstRefPos = currRefPos; } - getNextConsecutiveRefPositionsMap()->Set(lastRefPos, currRefPos, - LinearScan::NextConsecutiveRefPositionsMap::Overwrite); - getNextConsecutiveRefPositionsMap()->Set(currRefPos, nullptr); + refPositionMap->Set(lastRefPos, currRefPos, + LinearScan::NextConsecutiveRefPositionsMap::Overwrite); + refPositionMap->Set(currRefPos, nullptr); lastRefPos = currRefPos; regCount++; From 4526b41c8bb3d4d7249071ef74bc407d1a3ce4e8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 2 Feb 2023 10:07:31 -0800 Subject: [PATCH 045/125] Add the missing csproj --- .../Arm/AdvSimd/VectorTableLookup.csproj | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj new file mode 100644 index 00000000000000..5fdae5e5d7b9f3 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj @@ -0,0 +1,22 @@ + + + + true + Library + SharedLibrary + System.Private.CoreLib + 436 + 436 + + + + + + + + \ No newline at end of file From 46f0abd86d6e4cd2e0f89042b9cbea7e912e9acd Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 2 Feb 2023 21:48:05 -0800 Subject: [PATCH 046/125] Fix test cases --- .../JIT/HardwareIntrinsics/Arm/AdvSimd/Program.AdvSimd.cs | 2 -- .../HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template | 3 ++- .../HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template | 3 ++- .../HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template | 3 ++- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/Program.AdvSimd.cs b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/Program.AdvSimd.cs index 48c6003e615bd7..af869e6e80f7fc 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/Program.AdvSimd.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/Program.AdvSimd.cs @@ -1,8 +1,6 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. -extern alias CoreLib; -global using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Collections.Generic; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template index 4b28a7c1e5f5cc..460cbe3340e0a1 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template @@ -7,7 +7,8 @@ * changes, please update the corresponding template and run according to the * * directions listed in the file. * ******************************************************************************/ - +extern alias CoreLib; +using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template index c2472b39191bbf..7b60606c5dab56 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template @@ -7,7 +7,8 @@ * changes, please update the corresponding template and run according to the * * directions listed in the file. * ******************************************************************************/ - +extern alias CoreLib; +using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template index c6f1f941764a5d..c196615b324d20 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template @@ -7,7 +7,8 @@ * changes, please update the corresponding template and run according to the * * directions listed in the file. * ******************************************************************************/ - +extern alias CoreLib; +using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; From 6ef7c6825a8bc157498ee85ea8b79875e493bc11 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 3 Feb 2023 10:32:02 -0800 Subject: [PATCH 047/125] Add fake lib for AdvSimd.Arm64* as well --- .../HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj | 1 + .../HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj | 1 + 2 files changed, 2 insertions(+) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj index 5ca6505a8b662f..eb6eb02de71ab7 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj @@ -11,5 +11,6 @@ + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj index d3a58db15324ee..30338e7b5e2f32 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj @@ -11,5 +11,6 @@ + From b0b6a5e9abea195bd6dc35c5a638d280f20d6ed2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 3 Feb 2023 19:21:03 -0800 Subject: [PATCH 048/125] Remove the workaround --- .../ref/System.Private.CoreLib.ExtraApis.cs | 21 ----- .../ref/System.Private.CoreLib.ExtraApis.txt | 12 --- .../src/CompatibilitySuppressions.xml | 72 ++++++++++++++++++ .../src/CompatibilitySuppressions.xml | 76 ------------------- 4 files changed, 72 insertions(+), 109 deletions(-) delete mode 100644 src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs index 97ff2a1d64f1e3..a54004d00733f1 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs @@ -37,25 +37,4 @@ public static partial class Debug public static System.Diagnostics.DebugProvider SetProvider(System.Diagnostics.DebugProvider provider) { throw null; } } } -namespace System.Runtime.Intrinsics.Arm -{ - public abstract partial class AdvSimd : ArmBase - { - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public abstract partial class Arm64 : ArmBase.Arm64 - { - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - } - } -} diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt index 5a861db7629d75..0babd819e25d04 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt @@ -5,15 +5,3 @@ T:System.Runtime.Serialization.DeserializationToken M:System.Runtime.Serialization.SerializationInfo.StartDeserialization T:System.Diagnostics.DebugProvider M:System.Diagnostics.Debug.SetProvider(System.Diagnostics.DebugProvider) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) \ No newline at end of file diff --git a/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml b/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml index 3f7209cd1a5d5f..547a6a499f1bf3 100644 --- a/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml +++ b/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml @@ -21,4 +21,76 @@ CP0014 M:System.Runtime.InteropServices.Marshal.CreateWrapperOfType(System.Object,System.Type)->object?:[T:System.Diagnostics.CodeAnalysis.NotNullIfNotNullAttribute] + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + \ No newline at end of file diff --git a/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml b/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml deleted file mode 100644 index 3d3a055c0f9332..00000000000000 --- a/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml +++ /dev/null @@ -1,76 +0,0 @@ - - - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - \ No newline at end of file From 0197b7330ecd1b041e2dced99df911bd7c85d05a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 3 Feb 2023 19:23:45 -0800 Subject: [PATCH 049/125] Use template to control if consecutive registers is needed or not --- src/coreclr/jit/compiler.cpp | 4 ++++ src/coreclr/jit/compiler.h | 4 ++++ src/coreclr/jit/hwintrinsicarm64.cpp | 1 + src/coreclr/jit/lsra.cpp | 36 ++++++++++++++++++++-------- src/coreclr/jit/lsra.h | 5 +++- src/coreclr/jit/lsraarm64.cpp | 11 ++++----- 6 files changed, 44 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index efea87842f7766..c7bbab4977abc2 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6448,6 +6448,10 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, compBasicBlockID = 0; #endif +#ifdef TARGET_ARM64 + info.needsConsecutiveRegisters = false; +#endif + /* Initialize emitter */ if (!compIsForInlining()) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1299b34bb431e8..8177652b0b469f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9815,6 +9815,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Number of class profile probes in this method unsigned compHandleHistogramProbeCount; +#ifdef TARGET_ARM64 + bool needsConsecutiveRegisters; +#endif + } info; ReturnTypeDesc compRetTypeDesc; // ABI return type descriptor for the method diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 424689ff13dade..15fca4846dd2e8 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1879,6 +1879,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_AdvSimd_VectorTableLookup: case NI_AdvSimd_Arm64_VectorTableLookup: { + info.needsConsecutiveRegisters = true; assert(sig->numArgs == 2); CORINFO_ARG_LIST_HANDLE arg1 = sig->args; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 5bdabf9ee5778e..35addebebba186 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1261,7 +1261,16 @@ PhaseStatus LinearScan::doLinearScan() DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals")); initVarRegMaps(); + +#ifdef TARGET_ARM64 + if (compiler->info.needsConsecutiveRegisters) + { + allocateRegisters(); + } + else +#endif // TARGET_ARM64 allocateRegisters(); + allocationPassComplete = true; compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC); resolveRegisters(); @@ -2848,13 +2857,6 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, assignPhysReg(availablePhysRegRecord, currentInterval); refPosition->registerAssignment = foundRegBit; -#ifdef TARGET_ARM64 - if (refPosition->isFirstRefPositionOfConsecutiveRegisters()) - { - setNextConsecutiveRegisterAssignment(refPosition, foundRegBit); - } -#endif // TARGET_ARM64 - return foundReg; } @@ -4536,6 +4538,9 @@ void LinearScan::freeRegisters(regMaskTP regsToFree) // LinearScan::allocateRegisters: Perform the actual register allocation by iterating over // all of the previously constructed Intervals // +#ifdef TARGET_ARM64 +template +#endif void LinearScan::allocateRegisters() { JITDUMP("*************** In LinearScan::allocateRegisters()\n"); @@ -5315,8 +5320,13 @@ void LinearScan::allocateRegisters() regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); #ifdef TARGET_ARM64 - if (currentRefPosition.needsConsecutive) + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) { + if (currentRefPosition.regCount != 0) + { + setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); + } + // For consecutive register, it doesn't matter what the assigned register was. // We have just assigned it `copyRegMask` and that's the one in-use, and not the // one that was assigned previously. @@ -5381,7 +5391,7 @@ void LinearScan::allocateRegisters() } #ifdef TARGET_ARM64 - if (currentRefPosition.needsConsecutive) + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) { // For consecutive register, we would like to assign a register (if not already assigned) // to the 1st refPosition and the subsequent refPositions will just get the consecutive register. @@ -5394,7 +5404,7 @@ void LinearScan::allocateRegisters() // subsequent registers to the remaining position and skip the allocation for the // 1st refPosition altogether. - setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegBit); + setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); } } else @@ -5468,6 +5478,12 @@ void LinearScan::allocateRegisters() unassignPhysReg(currentInterval->assignedReg, nullptr); } assignedRegister = allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) + { + setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); + } +#endif // TARGET_ARM64 } // If no register was found, this RefPosition must not require a register. diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index c440ec56d57d4c..38dbd096da57ee 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -639,6 +639,9 @@ class LinearScan : public LinearScanInterface void buildIntervals(); // This is where the actual assignment is done +#ifdef TARGET_ARM64 + template +#endif void allocateRegisters(); // This is the resolution phase, where cross-block mismatches are fixed up @@ -1184,7 +1187,7 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ #if defined(TARGET_ARM64) - void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regMaskTP firstRegAssigned); + void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); #endif // TARGET_ARM64 regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 27a56ff8586f3d..c8a0bfd1e97978 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -50,21 +50,20 @@ RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) // // Arguments: // firstRefPosition - First refPosition of the series of consecutive registers. -// firstRegAssigned - Register assigned to the first refposition. +// firstReg - Register assigned to the first refposition. // -void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regMaskTP firstRegAssigned) +void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned) { - regNumber firstReg = genRegNumFromMask(firstRegAssigned); - assert(isSingleRegister(firstRegAssigned)); + assert(isSingleRegister(genRegMask(firstRegAssigned))); assert(firstRefPosition->isFirstRefPositionOfConsecutiveRegisters()); - assert(emitter::isVectorRegister(firstReg)); + assert(emitter::isVectorRegister(firstRegAssigned)); RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); // should have at least one consecutive register requirement assert(consecutiveRefPosition != nullptr); - regNumber regToAssign = firstReg == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstReg); + regNumber regToAssign = firstRegAssigned == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstRegAssigned); INDEBUG(int refPosCount = 1); while (consecutiveRefPosition != nullptr) From 273402380bad3101c62458ee7e1e5365eb490c42 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 3 Feb 2023 19:24:23 -0800 Subject: [PATCH 050/125] jit format --- src/coreclr/jit/lsra.cpp | 4 ++-- src/coreclr/jit/lsra.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 35addebebba186..9f292ef415094e 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1269,7 +1269,7 @@ PhaseStatus LinearScan::doLinearScan() } else #endif // TARGET_ARM64 - allocateRegisters(); + allocateRegisters(); allocationPassComplete = true; compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC); @@ -5325,7 +5325,7 @@ void LinearScan::allocateRegisters() if (currentRefPosition.regCount != 0) { setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); - } + } // For consecutive register, it doesn't matter what the assigned register was. // We have just assigned it `copyRegMask` and that's the one in-use, and not the diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 38dbd096da57ee..85e82f396d83b5 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -638,7 +638,7 @@ class LinearScan : public LinearScanInterface // This does the dataflow analysis and builds the intervals void buildIntervals(); - // This is where the actual assignment is done +// This is where the actual assignment is done #ifdef TARGET_ARM64 template #endif From 1cb22d00d7159f40d33538ea29008e6764d52e45 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sat, 4 Feb 2023 09:07:57 -0800 Subject: [PATCH 051/125] fix the workaround --- .../src/CompatibilitySuppressions.xml | 72 ------------------ .../src/CompatibilitySuppressions.xml | 76 +++++++++++++++++++ 2 files changed, 76 insertions(+), 72 deletions(-) create mode 100644 src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml diff --git a/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml b/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml index 547a6a499f1bf3..3f7209cd1a5d5f 100644 --- a/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml +++ b/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml @@ -21,76 +21,4 @@ CP0014 M:System.Runtime.InteropServices.Marshal.CreateWrapperOfType(System.Object,System.Type)->object?:[T:System.Diagnostics.CodeAnalysis.NotNullIfNotNullAttribute] - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - \ No newline at end of file diff --git a/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml b/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml new file mode 100644 index 00000000000000..3d3a055c0f9332 --- /dev/null +++ b/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml @@ -0,0 +1,76 @@ + + + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + \ No newline at end of file From 6e30b3ad99cf37076287c0f719fcf9b799f2255a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sat, 4 Feb 2023 09:31:06 -0800 Subject: [PATCH 052/125] Revert "fix the workaround" This reverts commit 1cb22d00d7159f40d33538ea29008e6764d52e45. --- .../src/CompatibilitySuppressions.xml | 72 ++++++++++++++++++ .../src/CompatibilitySuppressions.xml | 76 ------------------- 2 files changed, 72 insertions(+), 76 deletions(-) delete mode 100644 src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml diff --git a/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml b/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml index 3f7209cd1a5d5f..547a6a499f1bf3 100644 --- a/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml +++ b/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml @@ -21,4 +21,76 @@ CP0014 M:System.Runtime.InteropServices.Marshal.CreateWrapperOfType(System.Object,System.Type)->object?:[T:System.Diagnostics.CodeAnalysis.NotNullIfNotNullAttribute] + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + \ No newline at end of file diff --git a/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml b/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml deleted file mode 100644 index 3d3a055c0f9332..00000000000000 --- a/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml +++ /dev/null @@ -1,76 +0,0 @@ - - - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - \ No newline at end of file From 721823bb2df5da37abe163a2ea78295d1350c162 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sat, 4 Feb 2023 09:31:17 -0800 Subject: [PATCH 053/125] Revert "Remove the workaround" This reverts commit b0b6a5e9abea195bd6dc35c5a638d280f20d6ed2. --- .../ref/System.Private.CoreLib.ExtraApis.cs | 21 +++++ .../ref/System.Private.CoreLib.ExtraApis.txt | 12 +++ .../src/CompatibilitySuppressions.xml | 72 ------------------ .../src/CompatibilitySuppressions.xml | 76 +++++++++++++++++++ 4 files changed, 109 insertions(+), 72 deletions(-) create mode 100644 src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs index a54004d00733f1..97ff2a1d64f1e3 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs @@ -37,4 +37,25 @@ public static partial class Debug public static System.Diagnostics.DebugProvider SetProvider(System.Diagnostics.DebugProvider provider) { throw null; } } } +namespace System.Runtime.Intrinsics.Arm +{ + public abstract partial class AdvSimd : ArmBase + { + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public abstract partial class Arm64 : ArmBase.Arm64 + { + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + } + } +} diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt index 0babd819e25d04..5a861db7629d75 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt @@ -5,3 +5,15 @@ T:System.Runtime.Serialization.DeserializationToken M:System.Runtime.Serialization.SerializationInfo.StartDeserialization T:System.Diagnostics.DebugProvider M:System.Diagnostics.Debug.SetProvider(System.Diagnostics.DebugProvider) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) +M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) \ No newline at end of file diff --git a/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml b/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml index 547a6a499f1bf3..3f7209cd1a5d5f 100644 --- a/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml +++ b/src/libraries/System.Private.CoreLib/src/CompatibilitySuppressions.xml @@ -21,76 +21,4 @@ CP0014 M:System.Runtime.InteropServices.Marshal.CreateWrapperOfType(System.Object,System.Type)->object?:[T:System.Diagnostics.CodeAnalysis.NotNullIfNotNullAttribute] - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - \ No newline at end of file diff --git a/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml b/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml new file mode 100644 index 00000000000000..3d3a055c0f9332 --- /dev/null +++ b/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml @@ -0,0 +1,76 @@ + + + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + + CP0002 + M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) + ref/net8.0/System.Runtime.Intrinsics.dll + lib/net8.0/System.Runtime.Intrinsics.dll + + \ No newline at end of file From 5b9fac51c2c57980867fa77d364d7205a4d07efa Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 6 Feb 2023 11:24:53 -0800 Subject: [PATCH 054/125] Add VectorTableLookupExtensions in libraries --- .../Arm/AdvSimd.PlatformNotSupported.cs | 73 +++++++++++++++++++ .../System/Runtime/Intrinsics/Arm/AdvSimd.cs | 72 ++++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 24 ++++++ 3 files changed, 169 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs index eaf512e42e7763..a945aecc79d096 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.PlatformNotSupported.cs @@ -3710,6 +3710,43 @@ internal Arm64() { } /// public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, Vector128 table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x16_t vqtbx2q_u8(uint8x16x2_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqtbx2q_s8(int8x16x2_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x16_t vqtbx3q_u8(uint8x16x3_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqtbx3q_s8(int8x16x3_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x16_t vqtbx4q_u8(uint8x16x4_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x16_t vqtbx4q_s8(int8x16x4_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw new PlatformNotSupportedException(); } + /// /// uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) /// A64: ZIP2 Vd.8B, Vn.8B, Vm.8B @@ -15053,6 +15090,42 @@ internal Arm64() { } /// public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, Vector128 table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// + /// uint8x8_t vqtbx2q_u8(uint8x16x2_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x8_t vqtbx2q_u8(int8x16x2_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x8_t vqtbx3q_u8(uint8x16x3_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x8_t vqtbx3q_u8(int8x16x3_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// uint8x8_t vqtbx4q_u8(uint8x16x4_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + + /// + /// int8x8_t vqtbx4q_u8(int8x16x4_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw new PlatformNotSupportedException(); } + /// /// uint8x8_t veor_u8 (uint8x8_t a, uint8x8_t b) /// A32: VEOR Dd, Dn, Dm diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index 7a09be5a0820cf..dbe760cadee5f8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -3710,6 +3710,42 @@ internal Arm64() { } /// public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, Vector128 table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + /// + /// uint8x16_t vqtbx2q_u8(uint8x16x2_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + + /// + /// int8x16_t vqtbx2q_s8(int8x16x2_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + + /// + /// uint8x16_t vqtbx3q_u8(uint8x16x3_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + + /// + /// int8x16_t vqtbx3q_s8(int8x16x3_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + + /// + /// uint8x16_t vqtbx4q_u8(uint8x16x4_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + + /// + /// int8x16_t vqtbx4q_s8(int8x16x4_t t, uint8x16_t idx) + /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B + /// + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + /// /// uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) /// A64: ZIP2 Vd.8B, Vn.8B, Vm.8B @@ -15053,6 +15089,42 @@ internal Arm64() { } /// public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, Vector128 table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); + /// + /// uint8x8_t vqtbx2q_u8(uint8x16x2_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + + /// + /// int8x8_t vqtbx2q_u8(int8x16x2_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + + /// + /// uint8x8_t vqtbx3q_u8(uint8x16x3_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + + /// + /// int8x8_t vqtbx3q_u8(int8x16x3_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + + /// + /// uint8x8_t vqtbx4q_u8(uint8x16x4_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + + /// + /// int8x8_t vqtbx4q_u8(int8x16x4_t t, uint8x8_t idx) + /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B + /// + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + /// /// uint8x8_t veor_u8 (uint8x8_t a, uint8x8_t b) /// A32: VEOR Dd, Dn, Dm diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index fec48c2c5c1bed..ba7f49e1ae780b 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -2856,8 +2856,20 @@ public unsafe static void StoreSelectedScalar(ulong* address, System.Runtime.Int public static System.Runtime.Intrinsics.Vector128 SubtractWideningUpper(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector64 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 Xor(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } @@ -3493,8 +3505,20 @@ public unsafe static void StorePairScalarNonTemporal(uint* address, System.Runti public static System.Runtime.Intrinsics.Vector64 UnzipOdd(System.Runtime.Intrinsics.Vector64 left, System.Runtime.Intrinsics.Vector64 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookup(System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(System.Runtime.Intrinsics.Vector128 defaultValues, System.Runtime.Intrinsics.Vector128 table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } + public static System.Runtime.Intrinsics.Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 ZipHigh(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } From cb29aee3bab1d35dc0cb5bbab48393ef460ec065 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 7 Feb 2023 23:21:30 -0800 Subject: [PATCH 055/125] Add support for VectorTableLookupExtension --- src/coreclr/jit/hwintrinsicarm64.cpp | 54 ++++++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 63 ++++++- src/coreclr/jit/hwintrinsiclistarm64.h | 4 +- src/coreclr/jit/lsra.cpp | 18 +- src/coreclr/jit/lsra.h | 20 ++- src/coreclr/jit/lsraarm64.cpp | 189 +++++++++++++++----- src/coreclr/jit/lsrabuild.cpp | 44 +++-- 7 files changed, 324 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 15fca4846dd2e8..83cd5b2b096da1 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1929,7 +1929,61 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } break; } + case NI_AdvSimd_VectorTableLookupExtension: + case NI_AdvSimd_Arm64_VectorTableLookupExtension: + { + info.needsConsecutiveRegisters = true; + assert(sig->numArgs == 3); + + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); + op3 = getArgForHWIntrinsic(argType, argClass); + + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = impPopStack().val; + op1 = impPopStack().val; + + if (op2->TypeGet() == TYP_STRUCT) + { + if (!op2->OperIs(GT_LCL_VAR)) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); + + impAssignTempGen(tmp, op2, CHECK_SPILL_NONE); + op2 = gtNewLclvNode(tmp, argType); + } + + LclVarDsc* op2VarDsc = lvaGetDesc(op2->AsLclVar()); + unsigned lclNum = lvaGetLclNum(op2VarDsc); + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); + unsigned fieldSize = op2VarDsc->lvSize() / fieldCount; + var_types fieldType = TYP_SIMD16; + + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + int offset = 0; + for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) + { + GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fieldType, offset); + fieldList->AddField(this, fldNode, offset, fieldType); + + offset += fieldSize; + } + op2 = fieldList; + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + } + else + { + assert(op2->TypeGet() == TYP_SIMD16); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); + } + break; + } default: { return nullptr; diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 9843cd01babb20..a08f90f40e433c 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -428,7 +428,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_Arm64_VectorTableLookup: ins = INS_tbl; break; - + case NI_AdvSimd_VectorTableLookupExtension: + case NI_AdvSimd_Arm64_VectorTableLookupExtension: + ins = INS_tbx; + break; case NI_AdvSimd_AddWideningLower: assert(varTypeIsIntegral(intrin.baseType)); if (intrin.op1->TypeGet() == TYP_SIMD8) @@ -497,14 +500,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_VectorTableLookup: case NI_AdvSimd_Arm64_VectorTableLookup: { - unsigned regCount = 1; + unsigned regCount = 0; if (intrin.op1->OperIsFieldList()) { GenTreeFieldList* fieldList = intrin.op1->AsFieldList(); GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); op1Reg = firstField->GetRegNum(); INDEBUG(regNumber argReg = op1Reg); - unsigned regCount = 0; for (GenTreeFieldList::Use& use : fieldList->Uses()) { regCount++; @@ -518,7 +520,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { - op1Reg = intrin.op1->GetRegNum(); + regCount = 1; + op1Reg = intrin.op1->GetRegNum(); } switch (regCount) @@ -541,7 +544,59 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; } + case NI_AdvSimd_VectorTableLookupExtension: + case NI_AdvSimd_Arm64_VectorTableLookupExtension: + { + assert(isRMW); + assert(targetReg != op2Reg); + assert(targetReg != op3Reg); + unsigned regCount = 0; + op1Reg = intrin.op1->GetRegNum(); + op3Reg = intrin.op3->GetRegNum(); + if (intrin.op2->OperIsFieldList()) + { + GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + op2Reg = firstField->GetRegNum(); + INDEBUG(regNumber argReg = op2Reg); + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + regCount++; +#ifdef DEBUG + GenTree* argNode = use.GetNode(); + assert(argReg == argNode->GetRegNum()); + argReg = REG_NEXT(argReg); +#endif + } + } + else + { + regCount = 1; + op2Reg = intrin.op2->GetRegNum(); + } + + switch (regCount) + { + case 2: + ins = INS_tbx_2regs; + break; + case 3: + ins = INS_tbx_3regs; + break; + case 4: + ins = INS_tbx_4regs; + break; + default: + assert(regCount == 1); + assert(ins == INS_tbx); + break; + } + + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt); + break; + } case NI_AdvSimd_BitwiseSelect: // Even though BitwiseSelect is an RMW intrinsic per se, we don't want to mark it as such // since we can handle all possible allocation decisions for targetReg. diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index d253267e28cea9..1e5b44c351bd88 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -476,7 +476,7 @@ HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, {INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, {INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, {INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor}, HW_Category_SIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, {INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningUpper, 16, 1, {INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) @@ -650,7 +650,7 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, {INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, {INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, {INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1}, HW_Category_SIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 9f292ef415094e..22d650c3742673 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5324,7 +5324,9 @@ void LinearScan::allocateRegisters() { if (currentRefPosition.regCount != 0) { - setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); + bool consecutiveAssigned = + setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); + assert(consecutiveAssigned); } // For consecutive register, it doesn't matter what the assigned register was. @@ -5404,7 +5406,15 @@ void LinearScan::allocateRegisters() // subsequent registers to the remaining position and skip the allocation for the // 1st refPosition altogether. - setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); + if (!setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister)) + { + // The consecutive registers are busy. Force to allocate even for the 1st + // refPosition + assignedRegister = REG_NA; + RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); + currentRefPosition.registerAssignment = allRegs(currentInterval->registerType); + unassignPhysRegNoSpill(physRegRecord); + } } } else @@ -5481,7 +5491,9 @@ void LinearScan::allocateRegisters() #ifdef TARGET_ARM64 if (hasConsecutiveRegister && currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) { - setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); + bool consecutiveAssigned = + setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); + assert(consecutiveAssigned); } #endif // TARGET_ARM64 } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 85e82f396d83b5..26fea0f2c47ec5 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1009,10 +1009,10 @@ class LinearScan : public LinearScanInterface #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet); - void buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, - LsraLocation currentLoc, - GenTree* node, - bool isUse); + RefPosition* buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, + LsraLocation currentLoc, + GenTree* node, + bool isUse); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE #if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) @@ -1187,7 +1187,7 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ #if defined(TARGET_ARM64) - void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); + bool setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); #endif // TARGET_ARM64 regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) @@ -1862,7 +1862,14 @@ class LinearScan : public LinearScanInterface bool isCandidateMultiRegLclVar(GenTreeLclVar* lclNode); bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + RefPosition* BuildUse(GenTree* operand, + regMaskTP candidates = RBM_NONE, + int multiRegIdx = 0, + RefPosition** restoreRefPosition = nullptr); +#else RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0); +#endif void setDelayFree(RefPosition* use); int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); @@ -1942,6 +1949,9 @@ class LinearScan : public LinearScanInterface #ifdef FEATURE_HW_INTRINSICS int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount); +#ifdef TARGET_ARM64 + int BuildConsecutiveRegisters(GenTree* treeNode, GenTree* rmwNode = nullptr); +#endif #endif // FEATURE_HW_INTRINSICS int BuildPutArgStk(GenTreePutArgStk* argNode); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index c8a0bfd1e97978..a2f2482b126d54 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -52,25 +52,53 @@ RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) // firstRefPosition - First refPosition of the series of consecutive registers. // firstReg - Register assigned to the first refposition. // -void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned) +// Returns: +// True if all the consecutive registers starting from `firstRegAssigned` were free. Even if one +// of them is busy, returns false and does not change the registerAssignment of a subsequent +// refPosition. +// +bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned) { assert(isSingleRegister(genRegMask(firstRegAssigned))); assert(firstRefPosition->isFirstRefPositionOfConsecutiveRegisters()); assert(emitter::isVectorRegister(firstRegAssigned)); - RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); + // Verify that all the consecutive registers needed are free, if not, return false. + // Need to do this before we set registerAssignment of any of the refPositions that + // are part of the range. + RefPosition* consecutiveRefPosition = firstRefPosition; + regNumber regToAssign = firstRegAssigned; + while (consecutiveRefPosition != nullptr) + { + if (isRegInUse(regToAssign, consecutiveRefPosition->getInterval()->registerType)) + { + return false; + } + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); + } + + consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); // should have at least one consecutive register requirement assert(consecutiveRefPosition != nullptr); - regNumber regToAssign = firstRegAssigned == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstRegAssigned); + regToAssign = firstRegAssigned == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstRegAssigned); INDEBUG(int refPosCount = 1); while (consecutiveRefPosition != nullptr) { consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); - consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); - regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); + if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) + { + // For restore refPosition, make sure to have same assignment for it and the next one + // which is the use of the variable. + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + assert(consecutiveRefPosition->refType == RefTypeUse); + consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); + } + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); #ifdef DEBUG refPosCount++; @@ -78,6 +106,7 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit } assert(refPosCount == firstRefPosition->regCount); + return true; } //------------------------------------------------------------------------ @@ -1044,42 +1073,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { if ((intrin.id == NI_AdvSimd_VectorTableLookup) || (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup)) { - if (intrin.op1->OperIsFieldList()) - { - unsigned regCount = 0; - RefPosition* currRefPos = nullptr; - RefPosition* firstRefPos = nullptr; - RefPosition* lastRefPos = nullptr; - - NextConsecutiveRefPositionsMap* refPositionMap = getNextConsecutiveRefPositionsMap(); - for (GenTreeFieldList::Use& use : intrin.op1->AsFieldList()->Uses()) - { - currRefPos = BuildUse(use.GetNode()); - currRefPos->needsConsecutive = true; - currRefPos->regCount = 0; - - if (firstRefPos == nullptr) - { - firstRefPos = currRefPos; - } - - refPositionMap->Set(lastRefPos, currRefPos, - LinearScan::NextConsecutiveRefPositionsMap::Overwrite); - refPositionMap->Set(currRefPos, nullptr); - - lastRefPos = currRefPos; - regCount++; - } - - // Just `regCount` to actual registers count for first ref-position. - // For others, set 0 so we can identify that this is non-first refposition. - firstRefPos->regCount = regCount; - srcCount += regCount; - } - else - { - srcCount += BuildOperandUses(intrin.op1); - } + srcCount += BuildConsecutiveRegisters(intrin.op1); } else { @@ -1088,7 +1082,30 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } } - if ((intrin.category == HW_Category_SIMDByIndexedElement) && (genTypeSize(intrin.baseType) == 2)) + if ((intrin.id == NI_AdvSimd_VectorTableLookup) || (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup) || + (intrin.id == NI_AdvSimd_VectorTableLookupExtension) || + (intrin.id == NI_AdvSimd_Arm64_VectorTableLookupExtension)) + { + if ((intrin.id == NI_AdvSimd_VectorTableLookup) || (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup)) + { + assert(intrin.op2 != nullptr); + srcCount += BuildOperandUses(intrin.op2); + } + else + { + assert(intrin.op2 != nullptr); + assert(intrin.op3 != nullptr); + srcCount += BuildConsecutiveRegisters(intrin.op2, intrin.op1); + srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1) : BuildOperandUses(intrin.op3); + } + assert(dstCount == 1); + buildInternalRegisterUses(); + BuildDef(intrinsicTree); + *pDstCount = 1; + + return srcCount; + } + else if ((intrin.category == HW_Category_SIMDByIndexedElement) && (genTypeSize(intrin.baseType) == 2)) { // Some "Advanced SIMD scalar x indexed element" and "Advanced SIMD vector x indexed element" instructions (e.g. // "MLA (by element)") have encoding that restricts what registers that can be used for the indexed element when @@ -1200,6 +1217,94 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou *pDstCount = dstCount; return srcCount; } + +int LinearScan::BuildConsecutiveRegisters(GenTree* treeNode, GenTree* rmwNode) +{ + int srcCount = 0; + Interval* rmwInterval = nullptr; + bool rmwIsLastUse = false; + if ((rmwNode != nullptr)) + { + if (isCandidateLocalRef(rmwNode)) + { + rmwInterval = getIntervalForLocalVarNode(rmwNode->AsLclVar()); + rmwIsLastUse = rmwNode->AsLclVar()->IsLastUse(0); + } + } + if (treeNode->OperIsFieldList()) + { + unsigned regCount = 0; + RefPosition* firstRefPos = nullptr; + RefPosition* currRefPos = nullptr; + RefPosition* lastRefPos = nullptr; + + NextConsecutiveRefPositionsMap* refPositionMap = getNextConsecutiveRefPositionsMap(); + for (GenTreeFieldList::Use& use : treeNode->AsFieldList()->Uses()) + { +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + RefPosition* restoreRefPos = nullptr; + currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0, &restoreRefPos); +#else + currRefPos = BuildUse(use.GetNode()); +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + currRefPos->needsConsecutive = true; + currRefPos->regCount = 0; +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (restoreRefPos != nullptr) + { + // If there was a restoreRefPosition created, make sure + // to link it as well so it gets same registerAssignment + restoreRefPos->needsConsecutive = true; + restoreRefPos->regCount = 0; + if (firstRefPos == nullptr) + { + firstRefPos = restoreRefPos; + } + refPositionMap->Set(lastRefPos, restoreRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); + refPositionMap->Set(restoreRefPos, currRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); + } + else +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + { + if (firstRefPos == nullptr) + { + firstRefPos = currRefPos; + } + refPositionMap->Set(lastRefPos, currRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); + } + + refPositionMap->Set(currRefPos, nullptr); + + lastRefPos = currRefPos; + regCount++; + if (rmwNode != nullptr) + { + // If we have rmwNode, determine if the currRefPos should be set to delay-free. + if ((currRefPos->getInterval() != rmwInterval) || (!rmwIsLastUse && !currRefPos->lastUse)) + { + setDelayFree(currRefPos); +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (restoreRefPos != nullptr) + { + setDelayFree(restoreRefPos); + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + } + } + } + + // Just `regCount` to actual registers count for first ref-position. + // For others, set 0 so we can identify that this is non-first refposition. + firstRefPos->regCount = regCount; + srcCount += regCount; + } + else + { + srcCount += BuildOperandUses(treeNode); + } + + return srcCount; +} #endif #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 1ef61660e80ec6..ddd376e6721049 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1589,18 +1589,21 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation cu // isUse - If the refPosition that is about to be created represents a use or not. // - If not, it would be the one at the end of the block. // -void LinearScan::buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, - LsraLocation currentLoc, - GenTree* node, - bool isUse) +// Returns: +// The refposition created for VectorRestore +// +RefPosition* LinearScan::buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, + LsraLocation currentLoc, + GenTree* node, + bool isUse) { + RefPosition* restorePos = nullptr; if (lclVarInterval->isPartiallySpilled) { unsigned varIndex = lclVarInterval->getVarIndex(compiler); Interval* upperVectorInterval = getUpperVectorInterval(varIndex); RefPosition* savePos = upperVectorInterval->recentRefPosition; - RefPosition* restorePos = - newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorRestore, node, RBM_NONE); + restorePos = newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorRestore, node, RBM_NONE); lclVarInterval->isPartiallySpilled = false; if (isUse) @@ -1619,6 +1622,7 @@ void LinearScan::buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, restorePos->regOptional = true; #endif } + return restorePos; } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -3025,9 +3029,10 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // the defList, and build a use RefPosition for the associated Interval. // // Arguments: -// operand - The node of interest -// candidates - The register candidates for the use -// multiRegIdx - The index of the multireg def/use +// operand - The node of interest +// candidates - The register candidates for the use +// multiRegIdx - The index of the multireg def/use +// restoreRefPosition - If there was any upperVector restore refposition created, return it. // // Return Value: // The newly created use RefPosition @@ -3035,7 +3040,14 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // Notes: // The node must not be contained, and must have been processed by buildRefPositionsForNode(). // +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +RefPosition* LinearScan::BuildUse(GenTree* operand, + regMaskTP candidates, + int multiRegIdx, + RefPosition** restoreRefPosition) +#else RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) +#endif { assert(!operand->isContained()); Interval* interval; @@ -3062,7 +3074,11 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu UpdatePreferencesOfDyingLocal(interval); } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - buildUpperVectorRestoreRefPosition(interval, currentLoc, operand, true); + RefPosition* upperVectorRefPos = buildUpperVectorRestoreRefPosition(interval, currentLoc, operand, true); + if (restoreRefPosition != nullptr) + { + *restoreRefPosition = upperVectorRefPos; + } #endif } else if (operand->IsMultiRegLclVar()) @@ -3076,7 +3092,11 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu VarSetOps::RemoveElemD(compiler, currentLiveVars, fieldVarDsc->lvVarIndex); } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - buildUpperVectorRestoreRefPosition(interval, currentLoc, operand, true); + RefPosition* upperVectorRefPos = buildUpperVectorRestoreRefPosition(interval, currentLoc, operand, true); + if (restoreRefPosition != nullptr) + { + *restoreRefPosition = upperVectorRefPos; + } #endif } else @@ -3483,7 +3503,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, defCandidates = allRegs(type); } #else - defCandidates = allRegs(type); + defCandidates = allRegs(type); #endif // TARGET_X86 RefPosition* def = newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, defCandidates, index); From 53b07b51c7d4baa3df20580f65d7b1793d0cf481 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 8 Feb 2023 12:33:02 -0800 Subject: [PATCH 056/125] WIP: available regs --- src/coreclr/jit/lsra.cpp | 4 ++++ src/coreclr/jit/lsra.h | 5 +++-- src/coreclr/jit/lsraarm64.cpp | 31 +++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 22d650c3742673..4dcfa12a4cef2c 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12036,7 +12036,11 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, reverseSelect = linearScan->doReverseSelect(); #endif // DEBUG +#ifdef TARGET_ARM64 + freeCandidates = linearScan->getFreeCandidates(candidates, refPosition); +#else freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); +#endif // TARGET_ARM64 // If no free candidates, then double check if refPosition is an actual ref. if (freeCandidates == RBM_NONE) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 26fea0f2c47ec5..73639bbf6e1354 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1188,8 +1188,8 @@ class LinearScan : public LinearScanInterface #if defined(TARGET_ARM64) bool setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); -#endif // TARGET_ARM64 - + regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); +#else regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) { regMaskTP result = candidates & m_AvailableRegs; @@ -1203,6 +1203,7 @@ class LinearScan : public LinearScanInterface #endif // TARGET_ARM return result; } +#endif // TARGET_ARM64 #ifdef DEBUG class RegisterSelection; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index a2f2482b126d54..94eaf61c000df9 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -109,6 +109,37 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit return true; } + +regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) +{ + regMaskTP result = candidates & m_AvailableRegs; + if (!refPosition->isFirstRefPositionOfConsecutiveRegisters()) + { + return result; + } + + + regMaskTP availbleRegs = m_AvailableRegs; + /* + 1. Find first `1` from LSB : a = bsf(input) + 2. Set everything until that point to `1` : temp |= ((1 << a) - 1) + 3. Find first `0` from LSB -> BitScanForward <-- c = bsf(~temp) + 4. if ((c - a) > regCount), then accumulate = (((1 << c) - 1) & (1 << a)) + 5. input = input & ~((1 << c) - 1) + 6. Repeat + */ + + uint32_t index = BitOperations::BitScanForward(static_cast(availableRegs)); + + + //regMaskTP availbleRegs = m_AvailableRegs >> 1; + for (int i = 0; i < refPosition->regCount; i++) + { + result &= availbleRegs; + availbleRegs >>= 1; + } + return result; +} //------------------------------------------------------------------------ // BuildNode: Build the RefPositions for a node // From 302d3baf8b72a1e8216c8a5e8f624d8d4b289a35 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 8 Feb 2023 12:33:11 -0800 Subject: [PATCH 057/125] WIP: Remove test hacks --- .../ref/System.Private.CoreLib.ExtraApis.cs | 42 +++++----- .../ref/System.Private.CoreLib.ExtraApis.txt | 14 +--- .../src/CompatibilitySuppressions.xml | 76 ------------------- 3 files changed, 22 insertions(+), 110 deletions(-) delete mode 100644 src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs index 97ff2a1d64f1e3..4437ec11b30330 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs @@ -37,25 +37,25 @@ public static partial class Debug public static System.Diagnostics.DebugProvider SetProvider(System.Diagnostics.DebugProvider provider) { throw null; } } } -namespace System.Runtime.Intrinsics.Arm -{ - public abstract partial class AdvSimd : ArmBase - { - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } +// namespace System.Runtime.Intrinsics.Arm +// { + // public abstract partial class AdvSimd : ArmBase + // { + // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } + // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - public abstract partial class Arm64 : ArmBase.Arm64 - { - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - } - } -} + // public abstract partial class Arm64 : ArmBase.Arm64 + // { + // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } + // } + // } +// } diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt index 5a861db7629d75..d2a2cb7c190336 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt @@ -4,16 +4,4 @@ T:System.Runtime.Serialization.DeserializationToken M:System.Runtime.Serialization.SerializationInfo.StartDeserialization T:System.Diagnostics.DebugProvider -M:System.Diagnostics.Debug.SetProvider(System.Diagnostics.DebugProvider) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector128) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) -M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128), System.Runtime.Intrinsics.Vector64) \ No newline at end of file +M:System.Diagnostics.Debug.SetProvider(System.Diagnostics.DebugProvider) \ No newline at end of file diff --git a/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml b/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml deleted file mode 100644 index 3d3a055c0f9332..00000000000000 --- a/src/libraries/System.Runtime.Intrinsics/src/CompatibilitySuppressions.xml +++ /dev/null @@ -1,76 +0,0 @@ - - - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector128{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.Arm64.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector128{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.Byte},System.Runtime.Intrinsics.Vector128{System.Byte}},System.Runtime.Intrinsics.Vector64{System.Byte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - - CP0002 - M:System.Runtime.Intrinsics.Arm.AdvSimd.VectorTableLookup(System.ValueTuple{System.Runtime.Intrinsics.Vector128{System.SByte},System.Runtime.Intrinsics.Vector128{System.SByte}},System.Runtime.Intrinsics.Vector64{System.SByte}) - ref/net8.0/System.Runtime.Intrinsics.dll - lib/net8.0/System.Runtime.Intrinsics.dll - - \ No newline at end of file From fc93cc222714688f590f876ff22d177e7d2e4cee Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 Feb 2023 22:33:17 -0800 Subject: [PATCH 058/125] Update getFreeCandidates() for consecutive registers --- src/coreclr/jit/lsraarm64.cpp | 123 +++++++++++++++++++++++++++++----- 1 file changed, 108 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 4280dfd79b4b97..ac7c859a8e3f28 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -118,27 +118,120 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo return result; } + unsigned int registersNeeded = refPosition->regCount; + regMaskTP currAvailableRegs = result; + if (BitOperations::PopCount(currAvailableRegs) < registersNeeded) + { + // If number of free registers are less than what we need, no point in scanning + // for them. + return RBM_NONE; + } - regMaskTP availbleRegs = m_AvailableRegs; - /* - 1. Find first `1` from LSB : a = bsf(input) - 2. Set everything until that point to `1` : temp |= ((1 << a) - 1) - 3. Find first `0` from LSB -> BitScanForward <-- c = bsf(~temp) - 4. if ((c - a) > regCount), then accumulate = (((1 << c) - 1) & (1 << a)) - 5. input = input & ~((1 << c) - 1) - 6. Repeat - */ +// At this point, for 'n' registers requirement, if Rm+1, Rm+2, Rm+3, ..., Rm+k are +// available, create the mask only for Rm+1, Rm+2, ..., Rm+(k-n+1) to convey that it +// is safe to assign any of those registers, but not beyond that. +#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ + regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ + regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ + overallResult |= availableRegistersMask; + + regMaskTP overallResult = RBM_NONE; + regMaskTP consecutiveResult = RBM_NONE; + uint32_t regAvailableStartIndex = 0, regAvailableEndIndex = 0; + do + { + // From LSB, find the first available register (bit `1`) + regAvailableStartIndex = BitOperations::_BitScanForward(currAvailableRegs); + regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; - uint32_t index = BitOperations::BitScanForward(static_cast(availableRegs)); + // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. + regMaskTP maskProcessed = ~(currAvailableRegs | startMask); + // From regAvailableStart, find the first unavailable register (bit `0`). + if (maskProcessed == 0) + { + regAvailableEndIndex = 64; + if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) + { + AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, currAvailableRegs); + } + break; + } + else + { + regAvailableEndIndex = BitOperations::_BitScanForward(maskProcessed); + } + regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; - //regMaskTP availbleRegs = m_AvailableRegs >> 1; - for (int i = 0; i < refPosition->regCount; i++) + // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available + // If they are equal to or greater than our register requirements, then add all of them to the result. + if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) + { + AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, (endMask & ~startMask)); + } + currAvailableRegs &= ~endMask; + } while (currAvailableRegs != RBM_NONE); + + if (compiler->opts.OptimizationEnabled()) { - result &= availbleRegs; - availbleRegs >>= 1; + // One last time, check if subsequent refpositions already have consecutive registers assigned + // and if yes, and if one of the register out of consecutiveResult is available for the first + // refposition, then just use that. This will avoid unnecessary copies. + + regNumber firstRegNum = REG_NA; + regNumber prevRegNum = REG_NA; + int foundCount = 0; + regMaskTP foundRegMask = RBM_NONE; + + RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(refPosition); + assert(consecutiveRefPosition != nullptr); + + for (unsigned int i = 1; i < registersNeeded; i++) + { + Interval* interval = consecutiveRefPosition->getInterval(); + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + + if (!interval->isActive) + { + foundRegMask = RBM_NONE; + foundCount = 0; + continue; + } + + regNumber currRegNum = interval->assignedReg->regNum; + if ((prevRegNum == REG_NA) || (prevRegNum == REG_PREV(currRegNum)) || + ((prevRegNum == REG_FP_LAST) && (currRegNum == REG_FP_FIRST))) + { + foundRegMask |= genRegMask(currRegNum); + if (prevRegNum == REG_NA) + { + firstRegNum = currRegNum; + } + prevRegNum = currRegNum; + foundCount++; + continue; + } + + foundRegMask = RBM_NONE; + foundCount = 0; + break; + } + + if (foundCount != 0) + { + assert(firstRegNum != REG_NA); + regMaskTP remainingRegsMask = ((1ULL << (registersNeeded - foundCount)) - 1) << (firstRegNum - 1); + + if ((overallResult & remainingRegsMask) != RBM_NONE) + { + // If remaining registers are available, then just set the firstRegister mask + consecutiveResult = 1ULL << (firstRegNum - 1); + } + } } - return result; + + return consecutiveResult; } //------------------------------------------------------------------------ // BuildNode: Build the RefPositions for a node From 78e87cd8d449e68bf2faba08cf85336f4308a8a3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 Feb 2023 22:33:42 -0800 Subject: [PATCH 059/125] Add missing resetRegState() --- src/coreclr/jit/lsrabuild.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index ddd376e6721049..433ca73c07bad9 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2129,6 +2129,8 @@ void LinearScan::buildIntervals() } #endif // DEBUG + resetRegState(); + #if DOUBLE_ALIGN // We will determine whether we should double align the frame during // identifyCandidates(), but we initially assume that we will not. From 60d383e414ba551c910875346bd0ad3672f4fe38 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 Feb 2023 22:34:41 -0800 Subject: [PATCH 060/125] Do not assume the current assigned register for consecutiveRegisters refposition is good. If a refposition is marked as needConsecutive, then do not just assume that the existing register assigned is good. We still go through the allocation for it to make sure that we allocate it a register such that the consecutive registers are also free. --- src/coreclr/jit/lsra.cpp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 303bcf2835b1d1..69a9a53a4e4cf6 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5289,7 +5289,11 @@ void LinearScan::allocateRegisters() setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } - else if ((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) + else if (((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) +#ifdef TARGET_ARM64 + && !(hasConsecutiveRegister && currentRefPosition.needsConsecutive) +#endif + ) { currentRefPosition.registerAssignment = assignedRegBit; if (!currentInterval->isActive) @@ -12046,11 +12050,18 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, prevRegBit = genRegMask(prevRegRec->regNum); if ((prevRegRec->assignedInterval == currentInterval) && ((candidates & prevRegBit) != RBM_NONE)) { - candidates = prevRegBit; - found = true; +#ifdef TARGET_ARM64 + // If this is allocating for consecutive register, we need to make sure that + // we allocate register, whose consecutive registers are also free. + if (!refPosition->needsConsecutive) +#endif + { + candidates = prevRegBit; + found = true; #ifdef DEBUG - *registerScore = THIS_ASSIGNED; + *registerScore = THIS_ASSIGNED; #endif + } } } else From 05f9fc69f589107bfec6cd092490bd3801b1004a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 Feb 2023 22:36:34 -0800 Subject: [PATCH 061/125] Handle case for copyReg For copyReg, if we assigned a different register, do not forget to free the existing register it was holding --- src/coreclr/jit/lsra.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 69a9a53a4e4cf6..c473187de9e315 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5337,9 +5337,6 @@ void LinearScan::allocateRegisters() // We have just assigned it `copyRegMask` and that's the one in-use, and not the // one that was assigned previously. assignedRegMask = REG_NA; - - // This should never be the first refposition of the series. - assert(currentRefPosition.regCount == 0); } #endif regsInUseThisLocation |= copyRegMask | assignedRegMask; @@ -5433,9 +5430,13 @@ void LinearScan::allocateRegisters() else { // If the subsequent refPosition is not assigned to the consecutive register, then reassign the - // right - // consecutive register. + // right consecutive register. assignedRegister = REG_NA; + if (assignedRegBit != RBM_NONE) + { + RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); + unassignPhysRegNoSpill(physRegRecord); + } } } } From b52059e41c047dddefe9d2cb1d8400936e0e918b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 Feb 2023 22:40:05 -0800 Subject: [PATCH 062/125] Update setNextConsecutiveRegister() with UPPER_VECTOR_RESTORE --- src/coreclr/jit/lsraarm64.cpp | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index ac7c859a8e3f28..3c72ca0686b72c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -59,6 +59,7 @@ RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) // bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned) { + assert(firstRefPosition->assignedReg() == firstRegAssigned); assert(isSingleRegister(genRegMask(firstRegAssigned))); assert(firstRefPosition->isFirstRefPositionOfConsecutiveRegisters()); assert(emitter::isVectorRegister(firstRegAssigned)); @@ -74,21 +75,28 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit { return false; } + +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) + { + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + assert(!isRegInUse(regToAssign, consecutiveRefPosition->getInterval()->registerType)); + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); } - consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); - - // should have at least one consecutive register requirement - assert(consecutiveRefPosition != nullptr); + //// + consecutiveRefPosition = firstRefPosition; + regToAssign = firstRegAssigned; + INDEBUG(int refPosCount = 0); - regToAssign = firstRegAssigned == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstRegAssigned); - - INDEBUG(int refPosCount = 1); while (consecutiveRefPosition != nullptr) { consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) { // For restore refPosition, make sure to have same assignment for it and the next one @@ -97,15 +105,15 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit assert(consecutiveRefPosition->refType == RefTypeUse); consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); -#ifdef DEBUG - refPosCount++; -#endif // DEBUG + INDEBUG(refPosCount++); } assert(refPosCount == firstRefPosition->regCount); + return true; } From 0721ad4158e03200a9cf13420f9b9c7c41878462 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 9 Feb 2023 22:40:17 -0800 Subject: [PATCH 063/125] Update code around copyReg Updated code such that if the refPosition is already assigned a register, then check if assignedRegister satisfies are needs (for first / non-first refposition). If not, performs copyReg. TODO: Extract the code surrounding and including copyReg until where we `continue`. --- src/coreclr/jit/lsra.cpp | 114 +++++++++++++++++++++++++++++----- src/coreclr/jit/lsra.h | 3 +- src/coreclr/jit/lsraarm64.cpp | 114 +++++++++++++++++++++++++--------- 3 files changed, 186 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index c473187de9e315..598e48a0093846 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5289,29 +5289,99 @@ void LinearScan::allocateRegisters() setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } - else if (((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) -#ifdef TARGET_ARM64 - && !(hasConsecutiveRegister && currentRefPosition.needsConsecutive) -#endif - ) + else if (((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0)) { - currentRefPosition.registerAssignment = assignedRegBit; - if (!currentInterval->isActive) +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister && currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) { - // If we've got an exposed use at the top of a block, the - // interval might not have been active. Otherwise if it's a use, - // the interval must be active. - if (refType == RefTypeDummyDef) + if (areNextConsecutiveRegistersFree(assignedRegister, currentRefPosition.regCount, + currentRefPosition.getInterval()->registerType)) { - currentInterval->isActive = true; - assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval); + // Current assignedRegister satisfies the consecutive registers requirements + currentRefPosition.registerAssignment = assignedRegBit; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); } else { - currentRefPosition.reload = true; + // It doesn't satisfy, so do a copyReg followed by assigning consecutive registers + // to remaining refPosition. + assert((currentRefPosition.refType == RefTypeUse) || + (currentRefPosition.refType == RefTypeUpperVectorRestore)); + regNumber copyReg = assignCopyReg(¤tRefPosition); + lastAllocatedRefPosition = ¤tRefPosition; + regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); + regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + bool consecutiveAssigned = setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); + assert(consecutiveAssigned); + + // For consecutive register, it doesn't matter what the assigned register was. + // We have just assigned it `copyRegMask` and that's the one in-use, and not the + // one that was assigned previously. + assignedRegMask = REG_NA; + + regsInUseThisLocation |= copyRegMask | assignedRegMask; + if (currentRefPosition.lastUse) + { + if (currentRefPosition.delayRegFree) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, + assignedRegister)); + delayRegsToFree |= copyRegMask | assignedRegMask; + regsInUseNextLocation |= copyRegMask | assignedRegMask; + } + else + { + INDEBUG( + dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister)); + regsToFree |= copyRegMask | assignedRegMask; + } + } + else + { + copyRegsToFree |= copyRegMask; + if (currentRefPosition.delayRegFree) + { + regsInUseNextLocation |= copyRegMask | assignedRegMask; + } + } + + // If this is a tree temp (non-localVar) interval, we will need an explicit move. + // Note: In theory a moveReg should cause the Interval to now have the new reg as its + // assigned register. However, that's not currently how this works. + // If we ever actually move lclVar intervals instead of copying, this will need to change. + if (!currentInterval->isLocalVar) + { + currentRefPosition.moveReg = true; + currentRefPosition.copyReg = false; + } + clearNextIntervalRef(copyReg, currentInterval->registerType); + clearSpillCost(copyReg, currentInterval->registerType); + updateNextIntervalRef(assignedRegister, currentInterval); + updateSpillCost(assignedRegister, currentInterval); + continue; + } + } + else +#endif + { + currentRefPosition.registerAssignment = assignedRegBit; + if (!currentInterval->isActive) + { + // If we've got an exposed use at the top of a block, the + // interval might not have been active. Otherwise if it's a use, + // the interval must be active. + if (refType == RefTypeDummyDef) + { + currentInterval->isActive = true; + assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval); + } + else + { + currentRefPosition.reload = true; + } } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); } - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); } else { @@ -5336,6 +5406,7 @@ void LinearScan::allocateRegisters() // For consecutive register, it doesn't matter what the assigned register was. // We have just assigned it `copyRegMask` and that's the one in-use, and not the // one that was assigned previously. + assignedRegMask = REG_NA; } #endif @@ -5427,6 +5498,15 @@ void LinearScan::allocateRegisters() // no need to find register to assign. allocate = false; } + //else if (lastAllocatedRefPosition->needsConsecutive && + // lastAllocatedRefPosition->refType == RefTypeUpperVectorRestore) + //{ + // // If previous refposition was part of the series and it was UpperVectorRestore, + // // we have already assigned the same register to this refposition as well. + // // No need to allocate. + // assert(lastAllocatedRefPosition->registerAssignment == currentRefPosition.registerAssignment); + // allocate = false; + //} else { // If the subsequent refPosition is not assigned to the consecutive register, then reassign the @@ -12018,6 +12098,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, // Eliminate candidates that are in-use or busy. if (!found) { + /* + * we assign same registerAssignment to UPPER_RESTORE and the next USE. When we allocate for + * USE, we see that the same register is now busy and so don't have candidates left. + */ regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; candidates &= ~busyRegs; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 5cc45548030dc9..7dc755ec57ddea 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1191,7 +1191,8 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ #if defined(TARGET_ARM64) - bool setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); + bool areNextConsecutiveRegistersFree(regNumber regToAssign, int registersToCheck, var_types registerType); + bool setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); #else regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 3c72ca0686b72c..7851602829fa30 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -67,56 +67,112 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit // Verify that all the consecutive registers needed are free, if not, return false. // Need to do this before we set registerAssignment of any of the refPositions that // are part of the range. - RefPosition* consecutiveRefPosition = firstRefPosition; - regNumber regToAssign = firstRegAssigned; - while (consecutiveRefPosition != nullptr) - { - if (isRegInUse(regToAssign, consecutiveRefPosition->getInterval()->registerType)) - { - return false; - } - -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) - { - consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); - assert(!isRegInUse(regToAssign, consecutiveRefPosition->getInterval()->registerType)); - } -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); - regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); + if (!areNextConsecutiveRegistersFree(firstRegAssigned, firstRefPosition->regCount, + firstRefPosition->getInterval()->registerType)) + { + return false; } + +// RefPosition* consecutiveRefPosition = firstRefPosition; +// regNumber regToAssign = firstRegAssigned; +// while (consecutiveRefPosition != nullptr) +// { +// if (isRegInUse(regToAssign, consecutiveRefPosition->getInterval()->registerType)) +// { +// return false; +// } +// +//#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +// if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) +// { +// consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); +// +// assert(consecutiveRefPosition->refType == RefTypeUse); +// assert(!isRegInUse(regToAssign, consecutiveRefPosition->getInterval()->registerType)); +// } +//#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE +// +// consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); +// regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); +// } - //// - consecutiveRefPosition = firstRefPosition; - regToAssign = firstRegAssigned; + RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); + regNumber regToAssign = firstRegAssigned == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstRegAssigned); INDEBUG(int refPosCount = 0); + regMaskTP busyConsecutiveRegMask = ~(((1ULL << firstRefPosition->regCount) - 1) << firstRegAssigned); while (consecutiveRefPosition != nullptr) { - consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) + if ((consecutiveRefPosition->refType == RefTypeUpperVectorRestore)) { - // For restore refPosition, make sure to have same assignment for it and the next one - // which is the use of the variable. + if (consecutiveRefPosition->getInterval()->isPartiallySpilled) + { + // Make sure that restore doesn't get one of the registers that are part of series we are trying to set + // currently. + consecutiveRefPosition->registerAssignment &= ~busyConsecutiveRegMask; + } consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); - assert(consecutiveRefPosition->refType == RefTypeUse); - consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); - regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); + consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); + consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); + regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); INDEBUG(refPosCount++); } +// while (consecutiveRefPosition != nullptr) +// { +// consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); +//#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE +// if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) +// { +// // For restore refPosition, make sure to have same assignment for it and the next one +// // which is the use of the variable. +// consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); +// consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); +// } +//#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE +// consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); +// regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); +// +// INDEBUG(refPosCount++); +// } + assert(refPosCount == firstRefPosition->regCount); return true; } +//------------------------------------------------------------------------ +// areNextConsecutiveRegistersBusy: Starting with `regToAssign`, check if next +// `registersToCheck` are free or not. +// +// Arguments: +// - First refPosition of the series of consecutive registers. +// regToAssign - Register assigned to the first refposition. +// registersCount - Number of registers to check. +// registerType - Type of register. +// +// Returns: +// True if all the consecutive registers starting from `regToAssign` were free. Even if one +// of them is busy, returns false. +// +bool LinearScan::areNextConsecutiveRegistersFree(regNumber regToAssign, int registersCount, var_types registerType) +{ + for (int i = 0; i < registersCount; i++) + { + if (isRegInUse(regToAssign, registerType)) + { + return false; + } + regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); + } + + return true; +} regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) { From 8a5c696423ab1ca027203ae24a0abd87bb16c947 Mon Sep 17 00:00:00 2001 From: Aleksey Kliger Date: Tue, 14 Feb 2023 16:32:24 -0500 Subject: [PATCH 064/125] Create the VectorTableLookup fake CoreLib as a reference assembly Make the AdvSimd.Arm64 tests reference the VectorTableLookup fake CoreLib as reference assembly; and ensure that it is not included as a ProjectReference by the toplevel HardwareIntrinsics merged test runners. The upshot is that the AdvSimd.Arm64 tests can call the extra APIs via a direct reference to CoreLib (instead of through System.Runtime), but the fake library is not copied into any test artifact directories, and the Mono AOT compiler never sees it. That said, after applying this, the test fails during AOT compilation of the *real* CoreLib ``` Mono Ahead of Time compiler - compiling assembly /Users/alklig/work/dotnet-runtime/runtime-bugs2/artifacts/tests/coreclr/osx.arm64.Release/Tests/Core_Root/System.Private.CoreLib.dll AOTID EA8D702E-9736-3BD5-435B-A9D5EEADCC78 %"System.ValueTuple`2, System.Runtime.Intrinsics.Vector128`1>"* %arg_table <16 x i8> * Assertion: should not be reached at /Users/alklig/work/dotnet-runtime/runtime-bugs2/src/mono/mono/mini/mini-llvm.c:1455 ``` --- .../Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj | 2 +- .../Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj | 2 +- src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj | 2 +- src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj | 2 +- .../HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj | 3 ++- src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj | 3 +++ src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj | 3 +++ 7 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj index eb6eb02de71ab7..9252a12f163c57 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj @@ -11,6 +11,6 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj index 30338e7b5e2f32..e57833e3b2b3e3 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj @@ -11,6 +11,6 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj index 03cdf582f33d65..3dc1b39cdaf9b8 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj @@ -11,6 +11,6 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj index 4423a589686ffc..d1d7e894662692 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj @@ -12,6 +12,6 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj index 5fdae5e5d7b9f3..4c04a139f2ee1b 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj @@ -12,6 +12,7 @@ System.Private.CoreLib 436 436 + true @@ -19,4 +20,4 @@ - \ No newline at end of file + diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj index d81af3a381450c..04ef01d82bc43d 100644 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj @@ -26,6 +26,9 @@ + + + diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj index cec6dbb86c4810..99c020a7f9a253 100644 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj @@ -27,6 +27,9 @@ + + + From e64527b4ba812329e45a61bb058a423c0d499035 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 14 Feb 2023 14:37:39 -0800 Subject: [PATCH 065/125] Rename VectorTableLookup to VectorTableLookup.RefOnly --- .../Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj | 2 +- .../Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj | 2 +- .../JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj | 2 +- .../JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj | 2 +- ...rTableLookup.csproj => VectorTableLookup.RefOnly.csproj} | 0 .../JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj | 6 ++---- .../JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj | 6 ++---- 7 files changed, 8 insertions(+), 12 deletions(-) rename src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/{VectorTableLookup.csproj => VectorTableLookup.RefOnly.csproj} (100%) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj index 9252a12f163c57..0b9e2196d889ac 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj @@ -11,6 +11,6 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj index e57833e3b2b3e3..14faab744963bf 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj @@ -11,6 +11,6 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj index 3dc1b39cdaf9b8..4f461394cef01c 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj @@ -11,6 +11,6 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj index d1d7e894662692..e0e606a719c553 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj @@ -12,6 +12,6 @@ - + diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.RefOnly.csproj similarity index 100% rename from src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.csproj rename to src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.RefOnly.csproj diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj index 04ef01d82bc43d..e07f4dccdc9840 100644 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj @@ -15,7 +15,8 @@ - + + @@ -26,9 +27,6 @@ - - - diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj index 99c020a7f9a253..d12fd6c5566294 100644 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj @@ -16,7 +16,8 @@ - + + @@ -27,9 +28,6 @@ - - - From 22270c50c9adefe0e921790451e1404e75e090b8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 15 Feb 2023 19:58:09 -0800 Subject: [PATCH 066/125] Start consecutive refpositions with RefTypeUse and never with RefTypeUpperVectorSave --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 2 - src/coreclr/jit/lsra.cpp | 11 +-- src/coreclr/jit/lsra.h | 4 +- src/coreclr/jit/lsraarm64.cpp | 94 ++++++--------------- 4 files changed, 29 insertions(+), 82 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index a08f90f40e433c..7bff9e3a4448aa 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -548,8 +548,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_Arm64_VectorTableLookupExtension: { assert(isRMW); - assert(targetReg != op2Reg); - assert(targetReg != op3Reg); unsigned regCount = 0; op1Reg = intrin.op1->GetRegNum(); op3Reg = intrin.op3->GetRegNum(); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 598e48a0093846..6af34134a03bcd 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5498,15 +5498,6 @@ void LinearScan::allocateRegisters() // no need to find register to assign. allocate = false; } - //else if (lastAllocatedRefPosition->needsConsecutive && - // lastAllocatedRefPosition->refType == RefTypeUpperVectorRestore) - //{ - // // If previous refposition was part of the series and it was UpperVectorRestore, - // // we have already assigned the same register to this refposition as well. - // // No need to allocate. - // assert(lastAllocatedRefPosition->registerAssignment == currentRefPosition.registerAssignment); - // allocate = false; - //} else { // If the subsequent refPosition is not assigned to the consecutive register, then reassign the @@ -12172,7 +12163,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, #ifdef TARGET_ARM64 freeCandidates = linearScan->getFreeCandidates(candidates, refPosition); #else - freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); #endif // TARGET_ARM64 // If no free candidates, then double check if refPosition is an actual ref. diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 7dc755ec57ddea..b99870ec3ec0f7 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1191,8 +1191,8 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ #if defined(TARGET_ARM64) - bool areNextConsecutiveRegistersFree(regNumber regToAssign, int registersToCheck, var_types registerType); - bool setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); + bool areNextConsecutiveRegistersFree(regNumber regToAssign, int registersToCheck, var_types registerType); + bool setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); #else regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 7851602829fa30..ad91b0db8f99a0 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -69,41 +69,23 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit // are part of the range. if (!areNextConsecutiveRegistersFree(firstRegAssigned, firstRefPosition->regCount, - firstRefPosition->getInterval()->registerType)) + firstRefPosition->getInterval()->registerType)) { return false; } - -// RefPosition* consecutiveRefPosition = firstRefPosition; -// regNumber regToAssign = firstRegAssigned; -// while (consecutiveRefPosition != nullptr) -// { -// if (isRegInUse(regToAssign, consecutiveRefPosition->getInterval()->registerType)) -// { -// return false; -// } -// -//#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE -// if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) -// { -// consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); -// -// assert(consecutiveRefPosition->refType == RefTypeUse); -// assert(!isRegInUse(regToAssign, consecutiveRefPosition->getInterval()->registerType)); -// } -//#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE -// -// consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); -// regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); -// } RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); regNumber regToAssign = firstRegAssigned == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstRegAssigned); - INDEBUG(int refPosCount = 0); + + // First refposition should always start with RefTypeUse + assert(firstRefPosition->refType != RefTypeUpperVectorRestore); + + INDEBUG(int refPosCount = 1); regMaskTP busyConsecutiveRegMask = ~(((1ULL << firstRefPosition->regCount) - 1) << firstRegAssigned); while (consecutiveRefPosition != nullptr) { + assert(consecutiveRefPosition->regCount == 0); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE if ((consecutiveRefPosition->refType == RefTypeUpperVectorRestore)) { @@ -111,38 +93,25 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit { // Make sure that restore doesn't get one of the registers that are part of series we are trying to set // currently. + // TODO-CQ: We could technically assign RefTypeUpperVectorRestore and its RefTypeUse same register, but + // during register selection, it might get tricky to know which of the busy registers are assigned to + // RefTypeUpperVectorRestore positions of corresponding variables for which (another criteria) + // we are trying to find consecutive registers. + consecutiveRefPosition->registerAssignment &= ~busyConsecutiveRegMask; } consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + INDEBUG(refPosCount++); + assert(consecutiveRefPosition->refType == RefTypeUse); consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); - - INDEBUG(refPosCount++); } -// while (consecutiveRefPosition != nullptr) -// { -// consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); -//#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE -// if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) -// { -// // For restore refPosition, make sure to have same assignment for it and the next one -// // which is the use of the variable. -// consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); -// consecutiveRefPosition->registerAssignment = genRegMask(regToAssign); -// } -//#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE -// consecutiveRefPosition = getNextConsecutiveRefPosition(consecutiveRefPosition); -// regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); -// -// INDEBUG(refPosCount++); -// } - assert(refPosCount == firstRefPosition->regCount); - + return true; } @@ -182,7 +151,7 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo return result; } - unsigned int registersNeeded = refPosition->regCount; + unsigned int registersNeeded = refPosition->regCount; regMaskTP currAvailableRegs = result; if (BitOperations::PopCount(currAvailableRegs) < registersNeeded) { @@ -194,19 +163,19 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo // At this point, for 'n' registers requirement, if Rm+1, Rm+2, Rm+3, ..., Rm+k are // available, create the mask only for Rm+1, Rm+2, ..., Rm+(k-n+1) to convey that it // is safe to assign any of those registers, but not beyond that. -#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ - regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ - regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ - consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ +#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ + regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ + regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; regMaskTP overallResult = RBM_NONE; regMaskTP consecutiveResult = RBM_NONE; - uint32_t regAvailableStartIndex = 0, regAvailableEndIndex = 0; + uint32_t regAvailableStartIndex = 0, regAvailableEndIndex = 0; do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::_BitScanForward(currAvailableRegs); + regAvailableStartIndex = BitOperations::_BitScanForward(currAvailableRegs); regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. @@ -1446,7 +1415,10 @@ int LinearScan::BuildConsecutiveRegisters(GenTree* treeNode, GenTree* rmwNode) restoreRefPos->regCount = 0; if (firstRefPos == nullptr) { - firstRefPos = restoreRefPos; + // Always set the non UpperVectorRestore. UpperVectorRestore can be assigned + // different independent register. + // See TODO-CQ in setNextConsecutiveRegisterAssignment(). + firstRefPos = currRefPos; } refPositionMap->Set(lastRefPos, restoreRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); refPositionMap->Set(restoreRefPos, currRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); @@ -1465,20 +1437,6 @@ int LinearScan::BuildConsecutiveRegisters(GenTree* treeNode, GenTree* rmwNode) lastRefPos = currRefPos; regCount++; - if (rmwNode != nullptr) - { - // If we have rmwNode, determine if the currRefPos should be set to delay-free. - if ((currRefPos->getInterval() != rmwInterval) || (!rmwIsLastUse && !currRefPos->lastUse)) - { - setDelayFree(currRefPos); -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if (restoreRefPos != nullptr) - { - setDelayFree(restoreRefPos); - } -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE - } - } } // Just `regCount` to actual registers count for first ref-position. From f3884fd7f681e2bcac9dd7fed783d49294ea4b58 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 16 Feb 2023 14:25:41 -0800 Subject: [PATCH 067/125] Add test cases for VectorTableLookupExtension --- .../GenerateHWIntrinsicTests_Arm.cs | 12 + .../VectorLookupExtension_2Test.template | 401 +++++++++++++++ .../VectorLookupExtension_3Test.template | 432 ++++++++++++++++ .../VectorLookupExtension_4Test.template | 462 ++++++++++++++++++ 4 files changed, 1307 insertions(+) create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 1bec97194db3ba..d466dc79d53167 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -1712,6 +1712,12 @@ ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector64", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector64", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), + ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector64_SByte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), ("VecBinOpTest.template", new Dictionary { ["TestName"] = "Xor_Vector64_Byte", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "Xor", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateIterResult"] = "Helpers.Xor(left[i], right[i]) != result[i]"}), ("VecBinOpTest.template", new Dictionary { ["TestName"] = "Xor_Vector64_Double", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "Xor", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Xor(left[i], right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}), ("VecBinOpTest.template", new Dictionary { ["TestName"] = "Xor_Vector64_Int16", ["Isa"] = "AdvSimd", ["LoadIsa"] = "AdvSimd", ["Method"] = "Xor", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.Xor(left[i], right[i]) != result[i]"}), @@ -2320,6 +2326,12 @@ ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), + ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "UnzipEven_Vector64_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "UnzipEven", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateEntry"] = "result[index] != left[i] || result[index + half] != right[i]"}), ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "UnzipEven_Vector64_Int16", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "UnzipEven", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateEntry"] = "result[index] != left[i] || result[index + half] != right[i]"}), ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "UnzipEven_Vector64_Int32", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "UnzipEven", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateEntry"] = "result[index] != left[i] || result[index + half] != right[i]"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template new file mode 100644 index 00000000000000..2a1242c60dfc1d --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template @@ -0,0 +1,401 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ +extern alias CoreLib; +using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void VectorLookupExtension_2_{RetBaseType}() + { + var test = new VectorLookupExtension_2Test__{Method}{RetBaseType}(); + + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookupExtension_2Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op2BaseType}[] inArray0, {Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op2BaseType}[] inArray3, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op2VectorType}<{Op2BaseType}> _fld0; + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op2VectorType}<{Op2BaseType}> _fld3; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookupExtension_2Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2), _fld3); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op2BaseType}[] _data0 = new {Op2BaseType}[Op2ElementCount]; + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data3 = new {Op2BaseType}[Op1ElementCount]; + + private static {Op2VectorType}<{Op2BaseType}> _clsVar0; + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op2VectorType}<{Op2BaseType}> _clsVar3; + + private {Op2VectorType}<{Op2BaseType}> _fld0; + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op2VectorType}<{Op2BaseType}> _fld3; + + private DataTable _dataTable; + + static VectorLookupExtension_2Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookupExtension_2Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op2BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data3[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data0, _data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof({Op2VectorType}<{Op2BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method is null) + { + method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { + typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) + }); + } + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + _clsVar0, + (_clsVar1, + _clsVar2), + _clsVar3 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _clsVar3, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op0 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr); + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr); + var result = {Isa}.{Method}(op0, (op1, op2), op3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op0, op1, op2, op3, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookupExtension_2Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2), test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2), _fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _fld3, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2), test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + private void ValidateResult({Op2VectorType}<{Op2BaseType}> op0, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op2VectorType}<{Op2BaseType}> op3, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray3 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), op0); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult(void* op0, void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray3 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), ref Unsafe.AsRef(op0), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult({Op2BaseType}[] defaultValues, {Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp}; + + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateIterResult}) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op2VectorType}<{Op2BaseType}>, ({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" defaultValues: ({string.Join(", ", defaultValues)})"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template new file mode 100644 index 00000000000000..d9ced1e2844761 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template @@ -0,0 +1,432 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ +extern alias CoreLib; +using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void VectorLookupExtension_3_{RetBaseType}() + { + var test = new VectorLookupExtension_3Test__{Method}{RetBaseType}(); + + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookupExtension_3Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] inArray4; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle inHandle4; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op2BaseType}[] inArray0, {Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {Op2BaseType}[] inArray4, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray4 = inArray4.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfinArray4 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.inArray4 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.inHandle4 = GCHandle.Alloc(this.inArray4, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray4Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), (uint)sizeOfinArray4); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray4Ptr => Align((byte*)(inHandle4.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + inHandle4.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op2VectorType}<{Op2BaseType}> _fld0; + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op1VectorType}<{Op1BaseType}> _fld3; + public {Op2VectorType}<{Op2BaseType}> _fld4; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld0), ref Unsafe.As<{Op1BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookupExtension_3Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2, _fld3), _fld4); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, _fld3, _fld4, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op2BaseType}[] _data0 = new {Op2BaseType}[Op2ElementCount]; + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data4 = new {Op2BaseType}[Op2ElementCount]; + + private static {Op2VectorType}<{Op2BaseType}> _clsVar0; + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op1VectorType}<{Op1BaseType}> _clsVar3; + private static {Op2VectorType}<{Op2BaseType}> _clsVar4; + + private {Op2VectorType}<{Op2BaseType}> _fld0; + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op2VectorType}<{Op2BaseType}> _fld4; + + private DataTable _dataTable; + + static VectorLookupExtension_3Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookupExtension_3Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld4), ref Unsafe.As<{Op2BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data0, _data1, _data2, _data3, _data4, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof({Op2VectorType}<{Op2BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method is null) + { + method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { + typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) + }); + } + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + _clsVar0, + (_clsVar1, + _clsVar2, + _clsVar3), + _clsVar4 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _clsVar3, _clsVar4, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op0 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr); + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); + var op4 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr); + var result = {Isa}.{Method}(op0, (op1, op2, op3), op4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op0, op1, op2, op3, op4, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookupExtension_3Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2, test._fld3), test._fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, test._fld4, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2, _fld3), _fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _fld3, _fld4, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2, test._fld3), test._fld4); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, test._fld4, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + private void ValidateResult({Op2VectorType}<{Op2BaseType}> op0, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op2VectorType}<{Op2BaseType}> op4, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray4 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), op0); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), op4); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, inArray4, outArray, method); + } + + private void ValidateResult(void* op0, void* op1, void* op2, void* op3, void* op4, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray4 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), ref Unsafe.AsRef(op0), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray4[0]), ref Unsafe.AsRef(op4), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, inArray4, outArray, method); + } + + private void ValidateResult({Op2BaseType}[] defaultValues, {Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp, thirdOp}; + + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateIterResult}) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op2VectorType}<{Op2BaseType}>, ({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" defaultValues: ({string.Join(", ", defaultValues)})"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template new file mode 100644 index 00000000000000..6f4f080ecc1209 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template @@ -0,0 +1,462 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ +extern alias CoreLib; +using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void VectorLookupExtension_4_{RetBaseType}() + { + var test = new VectorLookupExtension_4Test__{Method}{RetBaseType}(); + + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a static member works + test.RunClsVarScenario(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class VectorLookupExtension_4Test__{Method}{RetBaseType} + { + private struct DataTable + { + private byte[] inArray0; + private byte[] inArray1; + private byte[] inArray2; + private byte[] inArray3; + private byte[] inArray4; + private byte[] inArray5; + private byte[] outArray; + + private GCHandle inHandle0; + private GCHandle inHandle1; + private GCHandle inHandle2; + private GCHandle inHandle3; + private GCHandle inHandle4; + private GCHandle inHandle5; + private GCHandle outHandle; + + private ulong alignment; + + public DataTable({Op2BaseType}[] inArray0, {Op1BaseType}[] inArray1, {Op1BaseType}[] inArray2, {Op1BaseType}[] inArray3, {Op1BaseType}[] inArray4, {Op2BaseType}[] inArray5, {RetBaseType}[] outArray, int alignment) + { + int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray3 = inArray3.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray4 = inArray4.Length * Unsafe.SizeOf<{Op1BaseType}>(); + int sizeOfinArray5 = inArray5.Length * Unsafe.SizeOf<{Op2BaseType}>(); + int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<{RetBaseType}>(); + if ((alignment != 32 && alignment != 16 && alignment != 8) || (alignment * 2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfinArray3 || (alignment * 2) < sizeOfinArray4 || (alignment * 2) < sizeOfinArray5 || (alignment * 2) < sizeOfoutArray) + { + throw new ArgumentException("Invalid value of alignment"); + } + + this.inArray0 = new byte[alignment * 2]; + this.inArray1 = new byte[alignment * 2]; + this.inArray2 = new byte[alignment * 2]; + this.inArray3 = new byte[alignment * 2]; + this.inArray4 = new byte[alignment * 2]; + this.inArray5 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; + + this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); + this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); + this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); + this.inHandle3 = GCHandle.Alloc(this.inArray3, GCHandleType.Pinned); + this.inHandle4 = GCHandle.Alloc(this.inArray4, GCHandleType.Pinned); + this.inHandle5 = GCHandle.Alloc(this.inArray5, GCHandleType.Pinned); + this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned); + + this.alignment = (ulong)alignment; + + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray0Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), (uint)sizeOfinArray0); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray1Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), (uint)sizeOfinArray1); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray2Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), (uint)sizeOfinArray2); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray3Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), (uint)sizeOfinArray3); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray4Ptr), ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), (uint)sizeOfinArray4); + Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef(inArray5Ptr), ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), (uint)sizeOfinArray5); + } + + public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray3Ptr => Align((byte*)(inHandle3.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray4Ptr => Align((byte*)(inHandle4.AddrOfPinnedObject().ToPointer()), alignment); + public void* inArray5Ptr => Align((byte*)(inHandle5.AddrOfPinnedObject().ToPointer()), alignment); + public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment); + + public void Dispose() + { + inHandle0.Free(); + inHandle1.Free(); + inHandle2.Free(); + inHandle3.Free(); + inHandle4.Free(); + inHandle5.Free(); + outHandle.Free(); + } + + private static unsafe void* Align(byte* buffer, ulong expectedAlignment) + { + return (void*)(((ulong)buffer + expectedAlignment - 1) & ~(expectedAlignment - 1)); + } + } + + private struct TestStruct + { + public {Op2VectorType}<{Op2BaseType}> _fld0; + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op1VectorType}<{Op1BaseType}> _fld2; + public {Op1VectorType}<{Op1BaseType}> _fld3; + public {Op1VectorType}<{Op1BaseType}> _fld4; + public {Op2VectorType}<{Op2BaseType}> _fld5; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data4[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(VectorLookupExtension_4Test__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2, _fld3, _fld4), _fld5); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld0, _fld1, _fld2, _fld3, _fld4, _fld5, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op2BaseType}[] _data0 = new {Op2BaseType}[Op2ElementCount]; + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data2 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data3 = new {Op1BaseType}[Op1ElementCount]; + private static {Op1BaseType}[] _data4 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data5 = new {Op2BaseType}[Op2ElementCount]; + + private static {Op2VectorType}<{Op2BaseType}> _clsVar0; + private static {Op1VectorType}<{Op1BaseType}> _clsVar1; + private static {Op1VectorType}<{Op1BaseType}> _clsVar2; + private static {Op1VectorType}<{Op1BaseType}> _clsVar3; + private static {Op1VectorType}<{Op1BaseType}> _clsVar4; + private static {Op2VectorType}<{Op2BaseType}> _clsVar5; + + private {Op2VectorType}<{Op2BaseType}> _fld0; + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op1VectorType}<{Op1BaseType}> _fld2; + private {Op1VectorType}<{Op1BaseType}> _fld3; + private {Op1VectorType}<{Op1BaseType}> _fld4; + private {Op2VectorType}<{Op2BaseType}> _fld5; + + private DataTable _dataTable; + + static VectorLookupExtension_4Test__{Method}{RetBaseType}() + { + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _clsVar4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + } + + public VectorLookupExtension_4Test__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld0), ref Unsafe.As<{Op2BaseType}, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op1BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld4), ref Unsafe.As<{Op1BaseType}, byte>(ref _data4[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data0[i] = {NextValueOp0}; } + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data4[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } + _dataTable = new DataTable(_data0, _data1, _data2, _data3, _data4, _data5, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr)), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.inArray5Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { + typeof({Op2VectorType}<{Op2BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op1VectorType}<{Op1BaseType}>), + typeof({Op2VectorType}<{Op2BaseType}>) + }); + + if (method is null) + { + method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), + typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) + }); + } + + if (method.IsGenericMethodDefinition) + { + method = method.MakeGenericMethod(typeof({RetBaseType})); + } + + var result = method.Invoke(null, new object[] { + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.inArray4Ptr, _dataTable.inArray5Ptr, _dataTable.outArrayPtr); + } + + public void RunClsVarScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); + + var result = {Isa}.{Method}( + _clsVar0, + (_clsVar1, + _clsVar2, + _clsVar3, + _clsVar4), + _clsVar5 + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_clsVar0, _clsVar1, _clsVar2, _clsVar3, _clsVar4, _clsVar5, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op0 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr); + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr); + var op4 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr); + var op5 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr); + var result = {Isa}.{Method}(op0, (op1, op2, op3, op4), op5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op0, op1, op2, op3, op4, op5, _dataTable.outArrayPtr); + } + + public void RunClassLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); + + var test = new VectorLookupExtension_4Test__{Method}{RetBaseType}(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2, test._fld3, test._fld4), test._fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, test._fld4, test._fld5, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld0, (_fld1, _fld2, _fld3, _fld4), _fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld0, _fld1, _fld2, _fld3, _fld4, _fld5, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld0, (test._fld1, test._fld2, test._fld3, test._fld4), test._fld5); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld0, test._fld1, test._fld2, test._fld3, test._fld4, test._fld5, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + private void ValidateResult({Op2VectorType}<{Op2BaseType}> op0, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> op4, {Op2VectorType}<{Op2BaseType}> op5, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray4 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray5 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), op0); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), op4); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), op5); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, inArray4, inArray5, outArray, method); + } + + private void ValidateResult(void* op0, void* op1, void* op2, void* op3, void* op4, void* op5, void* result, [CallerMemberName] string method = "") + { + {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray2 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray3 = new {Op1BaseType}[Op1ElementCount]; + {Op1BaseType}[] inArray4 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray5 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray0[0]), ref Unsafe.AsRef(op0), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray4[0]), ref Unsafe.AsRef(op4), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray5[0]), ref Unsafe.AsRef(op5), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray0, inArray1, inArray2, inArray3, inArray4, inArray5, outArray, method); + } + + private void ValidateResult({Op2BaseType}[] defaultValues, {Op1BaseType}[] firstOp, {Op1BaseType}[] secondOp, {Op1BaseType}[] thirdOp, {Op1BaseType}[] fourthOp, {Op2BaseType}[] indices, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + {Op1BaseType}[][] table = {firstOp, secondOp, thirdOp, fourthOp}; + + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateIterResult}) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op2VectorType}<{Op2BaseType}>, ({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>), {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" defaultValues: ({string.Join(", ", defaultValues)})"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); + TestLibrary.TestFramework.LogInformation($" fourthOp: ({string.Join(", ", fourthOp)})"); + TestLibrary.TestFramework.LogInformation($" indices: ({string.Join(", ", indices)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} From f2a1f1923ff4ef89687e89bc6e58cd0269846c53 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 Feb 2023 10:42:28 -0800 Subject: [PATCH 068/125] Pass the missing defaultValues --- .../System/Runtime/Intrinsics/Arm/AdvSimd.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index dbe760cadee5f8..a5387fccd9ad04 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -3714,37 +3714,37 @@ internal Arm64() { } /// uint8x16_t vqtbx2q_u8(uint8x16x2_t t, uint8x16_t idx) /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// int8x16_t vqtbx2q_s8(int8x16x2_t t, uint8x16_t idx) /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B}, Vm.16B /// - public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// uint8x16_t vqtbx3q_u8(uint8x16x3_t t, uint8x16_t idx) /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B /// - public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// int8x16_t vqtbx3q_s8(int8x16x3_t t, uint8x16_t idx) /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.16B /// - public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// uint8x16_t vqtbx4q_u8(uint8x16x4_t t, uint8x16_t idx) /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B /// - public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// int8x16_t vqtbx4q_s8(int8x16x4_t t, uint8x16_t idx) /// A64: TBX Vd.16B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.16B /// - public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector128 VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector128 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// uint8x8_t vzip2_u8(uint8x8_t a, uint8x8_t b) @@ -15093,37 +15093,37 @@ internal Arm64() { } /// uint8x8_t vqtbx2q_u8(uint8x16x2_t t, uint8x8_t idx) /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B /// - public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// int8x8_t vqtbx2q_u8(int8x16x2_t t, uint8x8_t idx) /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B}, Vm.8B /// - public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// uint8x8_t vqtbx3q_u8(uint8x16x3_t t, uint8x8_t idx) /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B /// - public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// int8x8_t vqtbx3q_u8(int8x16x3_t t, uint8x8_t idx) /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B}, Vm.8B /// - public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// uint8x8_t vqtbx4q_u8(uint8x16x4_t t, uint8x8_t idx) /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B /// - public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// int8x8_t vqtbx4q_u8(int8x16x4_t t, uint8x8_t idx) /// A64: TBX Vd.8B, {Vn.16B, Vn+1.16B, Vn+2.16B, Vn+3.16B}, Vm.8B /// - public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(table, byteIndexes); + public static Vector64 VectorTableLookupExtension(Vector64 defaultValues, (Vector128, Vector128, Vector128, Vector128) table, Vector64 byteIndexes) => VectorTableLookupExtension(defaultValues, table, byteIndexes); /// /// uint8x8_t veor_u8 (uint8x8_t a, uint8x8_t b) From 985fe25063bc9af36c08f794141f18e3028cc5e2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 Feb 2023 11:01:54 -0800 Subject: [PATCH 069/125] Use platform neutral BitScanForward --- src/coreclr/jit/lsraarm64.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index ad91b0db8f99a0..272d9ae0f8812c 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -87,7 +87,7 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit { assert(consecutiveRefPosition->regCount == 0); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - if ((consecutiveRefPosition->refType == RefTypeUpperVectorRestore)) + if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) { if (consecutiveRefPosition->getInterval()->isPartiallySpilled) { @@ -171,11 +171,11 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo regMaskTP overallResult = RBM_NONE; regMaskTP consecutiveResult = RBM_NONE; - uint32_t regAvailableStartIndex = 0, regAvailableEndIndex = 0; + DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; do { // From LSB, find the first available register (bit `1`) - regAvailableStartIndex = BitOperations::_BitScanForward(currAvailableRegs); + BitScanForward64(®AvailableStartIndex, static_cast(currAvailableRegs)); regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. @@ -193,7 +193,7 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo } else { - regAvailableEndIndex = BitOperations::_BitScanForward(maskProcessed); + BitScanForward64(®AvailableEndIndex, static_cast(maskProcessed)); } regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; From 13601eb9c8cbc6f0f5dfd24dc833d5e689c345a1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 Feb 2023 22:34:37 -0800 Subject: [PATCH 070/125] jit format --- src/coreclr/jit/lsraarm64.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 7ea9cf30e1f140..b434a7ca3faa10 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -171,12 +171,12 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo regMaskTP overallResult = RBM_NONE; regMaskTP consecutiveResult = RBM_NONE; - DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; + DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; do { // From LSB, find the first available register (bit `1`) BitScanForward64(®AvailableStartIndex, static_cast(currAvailableRegs)); - regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; + regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. regMaskTP maskProcessed = ~(currAvailableRegs | startMask); From 7bf9105cad347db4e6bb14cc147c1e3df2b07a7a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 26 Feb 2023 10:06:42 -0800 Subject: [PATCH 071/125] Remove the fake testlib workaround --- .../ref/System.Private.CoreLib.ExtraApis.cs | 22 ------------------ .../ref/System.Private.CoreLib.ExtraApis.txt | 2 +- .../System/Runtime/Intrinsics/Arm/AdvSimd.cs | 2 -- .../System/Runtime/Intrinsics/Arm/ArmBase.cs | 2 -- .../Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj | 1 - .../Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj | 1 - .../Arm/AdvSimd/AdvSimd_r.csproj | 1 - .../Arm/AdvSimd/AdvSimd_ro.csproj | 1 - .../AdvSimd/VectorTableLookup.RefOnly.csproj | 23 ------------------- .../HardwareIntrinsics_r.csproj | 3 +-- .../HardwareIntrinsics_ro.csproj | 3 +-- 11 files changed, 3 insertions(+), 58 deletions(-) delete mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.RefOnly.csproj diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs index 4437ec11b30330..84e99da5aa050f 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.cs @@ -37,25 +37,3 @@ public static partial class Debug public static System.Diagnostics.DebugProvider SetProvider(System.Diagnostics.DebugProvider provider) { throw null; } } } -// namespace System.Runtime.Intrinsics.Arm -// { - // public abstract partial class AdvSimd : ArmBase - // { - // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - // public static System.Runtime.Intrinsics.Vector64 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector64 byteIndexes) { throw null; } - - // public abstract partial class Arm64 : ArmBase.Arm64 - // { - // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - // public static System.Runtime.Intrinsics.Vector128 VectorTableLookup((System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128, System.Runtime.Intrinsics.Vector128) table, System.Runtime.Intrinsics.Vector128 byteIndexes) { throw null; } - // } - // } -// } diff --git a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt index d2a2cb7c190336..0babd819e25d04 100644 --- a/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt +++ b/src/libraries/System.Private.CoreLib/ref/System.Private.CoreLib.ExtraApis.txt @@ -4,4 +4,4 @@ T:System.Runtime.Serialization.DeserializationToken M:System.Runtime.Serialization.SerializationInfo.StartDeserialization T:System.Diagnostics.DebugProvider -M:System.Diagnostics.Debug.SetProvider(System.Diagnostics.DebugProvider) \ No newline at end of file +M:System.Diagnostics.Debug.SetProvider(System.Diagnostics.DebugProvider) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs index a5387fccd9ad04..72f1c60311491a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/AdvSimd.cs @@ -10,9 +10,7 @@ namespace System.Runtime.Intrinsics.Arm /// This class provides access to the ARM AdvSIMD hardware instructions via intrinsics /// [Intrinsic] -#if SYSTEM_PRIVATE_CORELIB [CLSCompliant(false)] -#endif public abstract class AdvSimd : ArmBase { internal AdvSimd() { } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/ArmBase.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/ArmBase.cs index d6c60878ecc37c..dd378377f5c506 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/ArmBase.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/ArmBase.cs @@ -9,9 +9,7 @@ namespace System.Runtime.Intrinsics.Arm /// This class provides access to the ARM base hardware instructions via intrinsics /// [Intrinsic] -#if SYSTEM_PRIVATE_CORELIB [CLSCompliant(false)] -#endif public abstract class ArmBase { internal ArmBase() { } diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj index 0b9e2196d889ac..5ca6505a8b662f 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_r.csproj @@ -11,6 +11,5 @@ - diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj index 14faab744963bf..d3a58db15324ee 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/AdvSimd.Arm64_ro.csproj @@ -11,6 +11,5 @@ - diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj index 4f461394cef01c..2687de5191369f 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_r.csproj @@ -11,6 +11,5 @@ - diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj index e0e606a719c553..15ddb552c82ad3 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj @@ -12,6 +12,5 @@ - diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.RefOnly.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.RefOnly.csproj deleted file mode 100644 index 4c04a139f2ee1b..00000000000000 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/VectorTableLookup.RefOnly.csproj +++ /dev/null @@ -1,23 +0,0 @@ - - - - true - Library - SharedLibrary - System.Private.CoreLib - 436 - 436 - true - - - - - - - - diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj index e07f4dccdc9840..d81af3a381450c 100644 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj @@ -15,8 +15,7 @@ - - + diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj index d12fd6c5566294..ab022381c65805 100644 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj @@ -16,8 +16,7 @@ - - + From 1f956372b1021c1b6a8d547dc5ee7fb746262125 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 26 Feb 2023 10:47:27 -0800 Subject: [PATCH 072/125] Fix mono failures --- src/mono/mono/mini/simd-intrinsics.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index bf01b02fb67144..c1d65f744a9aae 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -434,6 +434,12 @@ emit_simd_ins_for_unary_op (MonoCompile *cfg, MonoClass *klass, MonoMethodSignat #endif } +static gboolean +type_is_simd_vector (MonoType *type) +{ + return type->type == MONO_TYPE_GENERICINST && m_class_is_simd_type (mono_class_from_mono_type_internal (type)); +} + static gboolean is_hw_intrinsics_class (MonoClass *klass, const char *name, gboolean *is_64bit) { @@ -3060,8 +3066,8 @@ static SimdIntrinsic advsimd_methods [] = { {SN_TransposeOdd, OP_ARM64_TRN2}, {SN_UnzipEven, OP_ARM64_UZP1}, {SN_UnzipOdd, OP_ARM64_UZP2}, - {SN_VectorTableLookup, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBL1}, - {SN_VectorTableLookupExtension, OP_XOP_OVR_X_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBX1}, + {SN_VectorTableLookup}, + {SN_VectorTableLookupExtension}, {SN_Xor, OP_XBINOP_FORCEINT, XBINOP_FORCEINT_XOR}, {SN_ZeroExtendWideningLower, OP_ARM64_UXTL}, {SN_ZeroExtendWideningUpper, OP_ARM64_UXTL2}, @@ -3365,6 +3371,14 @@ emit_arm64_intrinsics ( ret->sreg3 = scalar->dreg; return ret; } + case SN_VectorTableLookup: + if (!type_is_simd_vector (fsig->params [0])) + return NULL; + return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBL1, 0, fsig, args); + case SN_VectorTableLookupExtension: + if (!type_is_simd_vector (fsig->params [0])) + return NULL; + return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBX1, 0, fsig, args); default: g_assert_not_reached (); } From e7bb0696a9724fc9ef3b7da8998821221b6f5c7d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 26 Feb 2023 12:23:01 -0800 Subject: [PATCH 073/125] Fix x64 TP regression --- src/coreclr/jit/lsra.h | 7 +++---- src/coreclr/jit/lsrabuild.cpp | 22 +++++++++++++++------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 362281c797e1c7..3e4063140e7ee3 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1870,10 +1870,9 @@ class LinearScan : public LinearScanInterface bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - RefPosition* BuildUse(GenTree* operand, - regMaskTP candidates = RBM_NONE, - int multiRegIdx = 0, - RefPosition** restoreRefPosition = nullptr); + RefPosition* BuildUse(GenTree* operand, + regMaskTP candidates = RBM_NONE, + int multiRegIdx = 0 ARM64_ARG(RefPosition** restoreRefPosition = nullptr)); #else RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0); #endif diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index dbfe40d4d5d2ae..e16187aee52953 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1597,13 +1597,13 @@ RefPosition* LinearScan::buildUpperVectorRestoreRefPosition(Interval* lclVarI GenTree* node, bool isUse) { - RefPosition* restorePos = nullptr; if (lclVarInterval->isPartiallySpilled) { unsigned varIndex = lclVarInterval->getVarIndex(compiler); Interval* upperVectorInterval = getUpperVectorInterval(varIndex); RefPosition* savePos = upperVectorInterval->recentRefPosition; - restorePos = newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorRestore, node, RBM_NONE); + RefPosition* restorePos = + newRefPosition(upperVectorInterval, currentLoc, RefTypeUpperVectorRestore, node, RBM_NONE); lclVarInterval->isPartiallySpilled = false; if (isUse) @@ -1620,9 +1620,14 @@ RefPosition* LinearScan::buildUpperVectorRestoreRefPosition(Interval* lclVarI #ifdef TARGET_XARCH restorePos->regOptional = true; +#endif +#ifdef TARGET_ARM64 + // Only needed for consecutive registers for now, which is only + // possible in TARGET_ARM64 + return restorePos; #endif } - return restorePos; + return nullptr; } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -3044,10 +3049,9 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // The node must not be contained, and must have been processed by buildRefPositionsForNode(). // #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE -RefPosition* LinearScan::BuildUse(GenTree* operand, - regMaskTP candidates, - int multiRegIdx, - RefPosition** restoreRefPosition) +RefPosition* LinearScan::BuildUse(GenTree* operand, + regMaskTP candidates, + int multiRegIdx ARM64_ARG(RefPosition** restoreRefPosition)) #else RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) #endif @@ -3078,10 +3082,12 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE RefPosition* upperVectorRefPos = buildUpperVectorRestoreRefPosition(interval, currentLoc, operand, true); +#ifdef TARGET_ARM64 if (restoreRefPosition != nullptr) { *restoreRefPosition = upperVectorRefPos; } +#endif // TARGET_ARM64 #endif } else if (operand->IsMultiRegLclVar()) @@ -3096,10 +3102,12 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE RefPosition* upperVectorRefPos = buildUpperVectorRestoreRefPosition(interval, currentLoc, operand, true); +#ifdef TARGET_ARM64 if (restoreRefPosition != nullptr) { *restoreRefPosition = upperVectorRefPos; } +#endif // TARGET_ARM64 #endif } else From 6ebb12a5de28eac3a36f1d7fba75b8982ad18c8b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 26 Feb 2023 23:15:51 -0800 Subject: [PATCH 074/125] Fix test cases --- .../Arm/Shared/VectorLookupExtension_2Test.template | 2 -- .../Arm/Shared/VectorLookupExtension_3Test.template | 2 -- .../Arm/Shared/VectorLookupExtension_4Test.template | 2 -- .../HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template | 2 -- .../HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template | 2 -- .../HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template | 2 -- 6 files changed, 12 deletions(-) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template index 2a1242c60dfc1d..2db90278bb29e1 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template @@ -7,8 +7,6 @@ * changes, please update the corresponding template and run according to the * * directions listed in the file. * ******************************************************************************/ -extern alias CoreLib; -using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template index d9ced1e2844761..b02c6ce4949280 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template @@ -7,8 +7,6 @@ * changes, please update the corresponding template and run according to the * * directions listed in the file. * ******************************************************************************/ -extern alias CoreLib; -using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template index 6f4f080ecc1209..f5e00730f15279 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template @@ -7,8 +7,6 @@ * changes, please update the corresponding template and run according to the * * directions listed in the file. * ******************************************************************************/ -extern alias CoreLib; -using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template index 460cbe3340e0a1..11c911094b416c 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template @@ -7,8 +7,6 @@ * changes, please update the corresponding template and run according to the * * directions listed in the file. * ******************************************************************************/ -extern alias CoreLib; -using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template index 7b60606c5dab56..eff5fb5f478564 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template @@ -7,8 +7,6 @@ * changes, please update the corresponding template and run according to the * * directions listed in the file. * ******************************************************************************/ -extern alias CoreLib; -using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template index c196615b324d20..d4536cfb144b7b 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template @@ -7,8 +7,6 @@ * changes, please update the corresponding template and run according to the * * directions listed in the file. * ******************************************************************************/ -extern alias CoreLib; -using AdvSimd = CoreLib::System.Runtime.Intrinsics.Arm.AdvSimd; using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; From 2d7529109eadff8793d0a0a1c054774d5d6f095b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 26 Feb 2023 23:16:37 -0800 Subject: [PATCH 075/125] fix some more tp regression --- src/coreclr/jit/lsra.h | 15 ++++-------- src/coreclr/jit/lsraarm64.cpp | 20 +++++++++++----- src/coreclr/jit/lsrabuild.cpp | 45 ++++++++--------------------------- 3 files changed, 28 insertions(+), 52 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 3e4063140e7ee3..cc99d075f3bec3 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1011,10 +1011,10 @@ class LinearScan : public LinearScanInterface #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE void buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation currentLoc, regMaskTP fpCalleeKillSet); - RefPosition* buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, - LsraLocation currentLoc, - GenTree* node, - bool isUse); + void buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, + LsraLocation currentLoc, + GenTree* node, + bool isUse); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE #if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) @@ -1869,14 +1869,7 @@ class LinearScan : public LinearScanInterface bool isCandidateMultiRegLclVar(GenTreeLclVar* lclNode); bool checkContainedOrCandidateLclVar(GenTreeLclVar* lclNode); -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - RefPosition* BuildUse(GenTree* operand, - regMaskTP candidates = RBM_NONE, - int multiRegIdx = 0 ARM64_ARG(RefPosition** restoreRefPosition = nullptr)); -#else RefPosition* BuildUse(GenTree* operand, regMaskTP candidates = RBM_NONE, int multiRegIdx = 0); -#endif - void setDelayFree(RefPosition* use); int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); int BuildCastUses(GenTreeCast* cast, regMaskTP candidates); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index b434a7ca3faa10..3219400486216a 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1395,12 +1395,20 @@ int LinearScan::BuildConsecutiveRegisters(GenTree* treeNode, GenTree* rmwNode) NextConsecutiveRefPositionsMap* refPositionMap = getNextConsecutiveRefPositionsMap(); for (GenTreeFieldList::Use& use : treeNode->AsFieldList()->Uses()) { -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - RefPosition* restoreRefPos = nullptr; - currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0, &restoreRefPos); -#else - currRefPos = BuildUse(use.GetNode()); -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + RefPosition* restoreRefPos = nullptr; + RefPositionIterator prevRefPos = refPositions.backPosition(); + currRefPos = BuildUse(use.GetNode(), RBM_NONE, 0); + + // Check if restore Refpositions were created + RefPositionIterator tailRefPos = refPositions.backPosition(); + assert(tailRefPos == currRefPos); + prevRefPos++; + if (prevRefPos != tailRefPos) + { + restoreRefPos = prevRefPos; + assert(restoreRefPos->refType == RefTypeUpperVectorRestore); + } + currRefPos->needsConsecutive = true; currRefPos->regCount = 0; #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index e16187aee52953..0c9dd529ebf2bf 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1589,13 +1589,10 @@ void LinearScan::buildUpperVectorSaveRefPositions(GenTree* tree, LsraLocation cu // isUse - If the refPosition that is about to be created represents a use or not. // - If not, it would be the one at the end of the block. // -// Returns: -// The refposition created for VectorRestore -// -RefPosition* LinearScan::buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, - LsraLocation currentLoc, - GenTree* node, - bool isUse) +void LinearScan::buildUpperVectorRestoreRefPosition(Interval* lclVarInterval, + LsraLocation currentLoc, + GenTree* node, + bool isUse) { if (lclVarInterval->isPartiallySpilled) { @@ -1620,14 +1617,8 @@ RefPosition* LinearScan::buildUpperVectorRestoreRefPosition(Interval* lclVarI #ifdef TARGET_XARCH restorePos->regOptional = true; -#endif -#ifdef TARGET_ARM64 - // Only needed for consecutive registers for now, which is only - // possible in TARGET_ARM64 - return restorePos; #endif } - return nullptr; } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -3040,7 +3031,6 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // operand - The node of interest // candidates - The register candidates for the use // multiRegIdx - The index of the multireg def/use -// restoreRefPosition - If there was any upperVector restore refposition created, return it. // // Return Value: // The newly created use RefPosition @@ -3048,13 +3038,10 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // Notes: // The node must not be contained, and must have been processed by buildRefPositionsForNode(). // -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE -RefPosition* LinearScan::BuildUse(GenTree* operand, - regMaskTP candidates, - int multiRegIdx ARM64_ARG(RefPosition** restoreRefPosition)) -#else +//#ifdef TARGET_ARM64 +// template +//#endif RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) -#endif { assert(!operand->isContained()); Interval* interval; @@ -3081,13 +3068,7 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu UpdatePreferencesOfDyingLocal(interval); } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - RefPosition* upperVectorRefPos = buildUpperVectorRestoreRefPosition(interval, currentLoc, operand, true); -#ifdef TARGET_ARM64 - if (restoreRefPosition != nullptr) - { - *restoreRefPosition = upperVectorRefPos; - } -#endif // TARGET_ARM64 + buildUpperVectorRestoreRefPosition(interval, currentLoc, operand, true); #endif } else if (operand->IsMultiRegLclVar()) @@ -3101,13 +3082,7 @@ RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int mu VarSetOps::RemoveElemD(compiler, currentLiveVars, fieldVarDsc->lvVarIndex); } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - RefPosition* upperVectorRefPos = buildUpperVectorRestoreRefPosition(interval, currentLoc, operand, true); -#ifdef TARGET_ARM64 - if (restoreRefPosition != nullptr) - { - *restoreRefPosition = upperVectorRefPos; - } -#endif // TARGET_ARM64 + buildUpperVectorRestoreRefPosition(interval, currentLoc, operand, true); #endif } else @@ -3546,7 +3521,7 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc, defCandidates = allRegs(type); } #else - defCandidates = allRegs(type); + defCandidates = allRegs(type); #endif // TARGET_X86 RefPosition* def = newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, defCandidates, index); From 68cd4d7a3ac2b7733bd2bb51ad03b8c29ae4fc91 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 27 Feb 2023 07:25:41 -0800 Subject: [PATCH 076/125] Fix test build --- .../GenerateHWIntrinsicTests_Arm.cs | 12 ++++++------ .../HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj | 1 - .../HardwareIntrinsics/HardwareIntrinsics_ro.csproj | 2 +- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index d466dc79d53167..3fcb781e65bc8b 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -2326,12 +2326,12 @@ ("VectorLookup_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookup4_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookup", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateFirstResult"] = "Helpers.TableVectorLookup(0, indices, table) != result[0]", ["ValidateRemainingResults"] = "Helpers.TableVectorLookup(i, indices, table) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(Byte)(TestLibrary.Generator.GetByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), ("VecTernOpTest.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(SByte)(TestLibrary.Generator.GetSByte() % 20)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, firstOp, thirdOp, secondOp) != result[i]"}), - ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), - ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), - ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), - ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), - ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), - ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorLookupExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_2Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension2_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 40)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_3Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension3_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 60)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector128_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "(Byte)(TestLibrary.Generator.GetByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), + ("VectorLookupExtension_4Test.template", new Dictionary { ["TestName"] = "VectorTableLookupExtension4_Vector128_SByte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "VectorTableLookupExtension", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp0"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "(SByte)(TestLibrary.Generator.GetSByte() % 80)", ["ValidateIterResult"] = "Helpers.TableVectorExtension(i, defaultValues, indices, table) != result[i]"}), ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "UnzipEven_Vector64_Byte", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "UnzipEven", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateEntry"] = "result[index] != left[i] || result[index + half] != right[i]"}), ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "UnzipEven_Vector64_Int16", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "UnzipEven", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateEntry"] = "result[index] != left[i] || result[index + half] != right[i]"}), ("VecPairBinOpTest.template", new Dictionary { ["TestName"] = "UnzipEven_Vector64_Int32", ["Isa"] = "AdvSimd.Arm64", ["LoadIsa"] = "AdvSimd", ["Method"] = "UnzipEven", ["RetVectorType"] = "Vector64", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector64", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector64", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateEntry"] = "result[index] != left[i] || result[index + half] != right[i]"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj index 15ddb552c82ad3..d7a4cd3f91a416 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd/AdvSimd_ro.csproj @@ -7,7 +7,6 @@ Embedded True - diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj index ab022381c65805..cec6dbb86c4810 100644 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj @@ -16,7 +16,7 @@ - + From 7b830530366c5c58e1e747a997d0292f6c8ebeff Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 28 Feb 2023 10:33:39 -0800 Subject: [PATCH 077/125] misc. changes --- src/coreclr/jit/lsra.cpp | 4 +--- src/coreclr/jit/lsrabuild.cpp | 3 --- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 2eb7ed18cb2b71..69d2d3be7a322d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2912,10 +2912,8 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, } } } - assignPhysReg(availablePhysRegRecord, currentInterval); refPosition->registerAssignment = foundRegBit; - return foundReg; } @@ -5348,7 +5346,7 @@ void LinearScan::allocateRegisters() setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } - else if (((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0)) + else if ((genRegMask(assignedRegister) & currentRefPosition.registerAssignment) != 0) { #ifdef TARGET_ARM64 if (hasConsecutiveRegister && currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 0c9dd529ebf2bf..b1e769f4032b98 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3038,9 +3038,6 @@ void LinearScan::UpdatePreferencesOfDyingLocal(Interval* interval) // Notes: // The node must not be contained, and must have been processed by buildRefPositionsForNode(). // -//#ifdef TARGET_ARM64 -// template -//#endif RefPosition* LinearScan::BuildUse(GenTree* operand, regMaskTP candidates, int multiRegIdx) { assert(!operand->isContained()); From 903c3de18514464bc005ff90022ef475b0fff02f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 1 Mar 2023 11:29:24 -0800 Subject: [PATCH 078/125] Fix the bug where we were not freeing copyReg causing an assert in tier0 --- src/coreclr/jit/lsra.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 69d2d3be7a322d..f4260562dafdc5 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5374,9 +5374,8 @@ void LinearScan::allocateRegisters() // For consecutive register, it doesn't matter what the assigned register was. // We have just assigned it `copyRegMask` and that's the one in-use, and not the // one that was assigned previously. - assignedRegMask = REG_NA; - regsInUseThisLocation |= copyRegMask | assignedRegMask; + regsInUseThisLocation |= copyRegMask; if (currentRefPosition.lastUse) { if (currentRefPosition.delayRegFree) @@ -5464,10 +5463,13 @@ void LinearScan::allocateRegisters() // We have just assigned it `copyRegMask` and that's the one in-use, and not the // one that was assigned previously. - assignedRegMask = REG_NA; + regsInUseThisLocation |= copyRegMask; } + else #endif - regsInUseThisLocation |= copyRegMask | assignedRegMask; + { + regsInUseThisLocation |= copyRegMask | assignedRegMask; + } if (currentRefPosition.lastUse) { if (currentRefPosition.delayRegFree) From a8ec81973d3cb116f83c6d721219c7c23b60f17d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 1 Mar 2023 11:37:43 -0800 Subject: [PATCH 079/125] Refactor little bit to reduce checks for VectorTableLookup --- src/coreclr/jit/hwintrinsic.h | 11 ++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 205 ++++++++++---------- src/coreclr/jit/hwintrinsiclistarm64.h | 8 +- src/coreclr/jit/lsra.cpp | 5 +- src/coreclr/jit/lsraarm64.cpp | 57 +++--- 5 files changed, 141 insertions(+), 145 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index e698a277f2fa5e..c2848581954b6f 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -176,6 +176,9 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic supports some sort of containment analysis HW_Flag_SupportsContainment = 0x2000, + + // The intrinsic needs consecutive registers + HW_Flag_NeedsConsecutiveRegisters = 0x4000, #else #error Unsupported platform #endif @@ -751,6 +754,14 @@ struct HWIntrinsicInfo return (flags & HW_Flag_SpecialCodeGen) != 0; } +#ifdef TARGET_ARM64 + static bool NeedsConsecutiveRegisters(NamedIntrinsic id) + { + HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_NeedsConsecutiveRegisters) != 0; + } +#endif + static bool HasRMWSemantics(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 7bff9e3a4448aa..e0911801b5050a 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -424,14 +424,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) instruction ins = INS_invalid; switch (intrin.id) { - case NI_AdvSimd_VectorTableLookup: - case NI_AdvSimd_Arm64_VectorTableLookup: - ins = INS_tbl; - break; - case NI_AdvSimd_VectorTableLookupExtension: - case NI_AdvSimd_Arm64_VectorTableLookupExtension: - ins = INS_tbx; - break; case NI_AdvSimd_AddWideningLower: assert(varTypeIsIntegral(intrin.baseType)); if (intrin.op1->TypeGet() == TYP_SIMD8) @@ -497,104 +489,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrin.id) { - case NI_AdvSimd_VectorTableLookup: - case NI_AdvSimd_Arm64_VectorTableLookup: - { - unsigned regCount = 0; - if (intrin.op1->OperIsFieldList()) - { - GenTreeFieldList* fieldList = intrin.op1->AsFieldList(); - GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); - op1Reg = firstField->GetRegNum(); - INDEBUG(regNumber argReg = op1Reg); - for (GenTreeFieldList::Use& use : fieldList->Uses()) - { - regCount++; -#ifdef DEBUG - - GenTree* argNode = use.GetNode(); - assert(argReg == argNode->GetRegNum()); - argReg = REG_NEXT(argReg); -#endif - } - } - else - { - regCount = 1; - op1Reg = intrin.op1->GetRegNum(); - } - - switch (regCount) - { - case 2: - ins = INS_tbl_2regs; - break; - case 3: - ins = INS_tbl_3regs; - break; - case 4: - ins = INS_tbl_4regs; - break; - default: - assert(regCount == 1); - assert(ins == INS_tbl); - break; - } - - GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); - break; - } - case NI_AdvSimd_VectorTableLookupExtension: - case NI_AdvSimd_Arm64_VectorTableLookupExtension: - { - assert(isRMW); - unsigned regCount = 0; - op1Reg = intrin.op1->GetRegNum(); - op3Reg = intrin.op3->GetRegNum(); - if (intrin.op2->OperIsFieldList()) - { - GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); - GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); - op2Reg = firstField->GetRegNum(); - INDEBUG(regNumber argReg = op2Reg); - for (GenTreeFieldList::Use& use : fieldList->Uses()) - { - regCount++; -#ifdef DEBUG - - GenTree* argNode = use.GetNode(); - assert(argReg == argNode->GetRegNum()); - argReg = REG_NEXT(argReg); -#endif - } - } - else - { - regCount = 1; - op2Reg = intrin.op2->GetRegNum(); - } - - switch (regCount) - { - case 2: - ins = INS_tbx_2regs; - break; - case 3: - ins = INS_tbx_3regs; - break; - case 4: - ins = INS_tbx_4regs; - break; - default: - assert(regCount == 1); - assert(ins == INS_tbx); - break; - } - - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); - GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt); - break; - } case NI_AdvSimd_BitwiseSelect: // Even though BitwiseSelect is an RMW intrinsic per se, we don't want to mark it as such // since we can handle all possible allocation decisions for targetReg. @@ -1101,6 +995,105 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) (emitSize == EA_8BYTE) ? INS_OPTS_8B : INS_OPTS_16B); break; + case NI_AdvSimd_VectorTableLookup: + case NI_AdvSimd_Arm64_VectorTableLookup: + { + unsigned regCount = 0; + if (intrin.op1->OperIsFieldList()) + { + GenTreeFieldList* fieldList = intrin.op1->AsFieldList(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + op1Reg = firstField->GetRegNum(); + INDEBUG(regNumber argReg = op1Reg); + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + regCount++; +#ifdef DEBUG + + GenTree* argNode = use.GetNode(); + assert(argReg == argNode->GetRegNum()); + argReg = REG_NEXT(argReg); +#endif + } + } + else + { + regCount = 1; + op1Reg = intrin.op1->GetRegNum(); + } + + switch (regCount) + { + case 2: + ins = INS_tbl_2regs; + break; + case 3: + ins = INS_tbl_3regs; + break; + case 4: + ins = INS_tbl_4regs; + break; + default: + assert(regCount == 1); + assert(ins == INS_tbl); + break; + } + + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); + break; + } + + case NI_AdvSimd_VectorTableLookupExtension: + case NI_AdvSimd_Arm64_VectorTableLookupExtension: + { + assert(isRMW); + unsigned regCount = 0; + op1Reg = intrin.op1->GetRegNum(); + op3Reg = intrin.op3->GetRegNum(); + if (intrin.op2->OperIsFieldList()) + { + GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); + GenTree* firstField = fieldList->Uses().GetHead()->GetNode(); + op2Reg = firstField->GetRegNum(); + INDEBUG(regNumber argReg = op2Reg); + for (GenTreeFieldList::Use& use : fieldList->Uses()) + { + regCount++; +#ifdef DEBUG + + GenTree* argNode = use.GetNode(); + assert(argReg == argNode->GetRegNum()); + argReg = REG_NEXT(argReg); +#endif + } + } + else + { + regCount = 1; + op2Reg = intrin.op2->GetRegNum(); + } + + switch (regCount) + { + case 2: + ins = INS_tbx_2regs; + break; + case 3: + ins = INS_tbx_3regs; + break; + case 4: + ins = INS_tbx_4regs; + break; + default: + assert(regCount == 1); + assert(ins == INS_tbx); + break; + } + + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg, /* canSkip */ true); + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt); + break; + } default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 1e5b44c351bd88..d307695d7321ba 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -475,8 +475,8 @@ HARDWARE_INTRINSIC(AdvSimd, SubtractSaturateScalar, HARDWARE_INTRINSIC(AdvSimd, SubtractScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sub, INS_sub, INS_fsub, INS_fsub}, HW_Category_SIMD, HW_Flag_SIMDScalar) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningLower, 8, 2, {INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubl, INS_usubl, INS_ssubw, INS_usubw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AdvSimd, SubtractWideningUpper, 16, 2, {INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubl2, INS_usubl2, INS_ssubw2, INS_usubw2, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookup, 8, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd, VectorTableLookupExtension, 8, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, Xor, -1, 2, {INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor, INS_eor}, HW_Category_SIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningLower, 8, 1, {INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_uxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, ZeroExtendWideningUpper, 16, 1, {INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_uxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) @@ -649,8 +649,8 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeEven, HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeOdd, -1, 2, {INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2, INS_trn2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipEven, -1, 2, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, UnzipOdd, -1, 2, {INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2, INS_uzp2}, HW_Category_SIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookup, 16, 2, {INS_tbl, INS_tbl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters) +HARDWARE_INTRINSIC(AdvSimd_Arm64, VectorTableLookupExtension, 16, 3, {INS_tbx, INS_tbx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipHigh, -1, 2, {INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2, INS_zip2}, HW_Category_SIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AdvSimd_Arm64, ZipLow, -1, 2, {INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1, INS_zip1}, HW_Category_SIMD, HW_Flag_NoFlag) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index f4260562dafdc5..9aaf4ae08ec207 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1298,10 +1298,6 @@ PhaseStatus LinearScan::doLinearScan() splitBBNumToTargetBBNumMap = nullptr; -#ifdef TARGET_ARM64 - nextConsecutiveRefPositionMap = nullptr; -#endif - // This is complicated by the fact that physical registers have refs associated // with locations where they are killed (e.g. calls), but we don't want to // count these as being touched. @@ -1318,6 +1314,7 @@ PhaseStatus LinearScan::doLinearScan() initVarRegMaps(); #ifdef TARGET_ARM64 + nextConsecutiveRefPositionMap = nullptr; if (compiler->info.needsConsecutiveRegisters) { allocateRegisters(); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 3219400486216a..802fb7f6451bae 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1223,43 +1223,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou tgtPrefUse = BuildUse(intrin.op1); srcCount++; } - else + else if ((intrin.id != NI_AdvSimd_VectorTableLookup) && (intrin.id != NI_AdvSimd_Arm64_VectorTableLookup)) { - if ((intrin.id == NI_AdvSimd_VectorTableLookup) || (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup)) - { - srcCount += BuildConsecutiveRegisters(intrin.op1); - } - else - { - srcCount += BuildOperandUses(intrin.op1); - } - } - } - - if ((intrin.id == NI_AdvSimd_VectorTableLookup) || (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup) || - (intrin.id == NI_AdvSimd_VectorTableLookupExtension) || - (intrin.id == NI_AdvSimd_Arm64_VectorTableLookupExtension)) - { - if ((intrin.id == NI_AdvSimd_VectorTableLookup) || (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup)) - { - assert(intrin.op2 != nullptr); - srcCount += BuildOperandUses(intrin.op2); + srcCount += BuildOperandUses(intrin.op1); } else { - assert(intrin.op2 != nullptr); - assert(intrin.op3 != nullptr); - srcCount += BuildConsecutiveRegisters(intrin.op2, intrin.op1); - srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1) : BuildOperandUses(intrin.op3); + srcCount += BuildConsecutiveRegisters(intrin.op1); } - assert(dstCount == 1); - buildInternalRegisterUses(); - BuildDef(intrinsicTree); - *pDstCount = 1; - - return srcCount; } - else if ((intrin.category == HW_Category_SIMDByIndexedElement) && (genTypeSize(intrin.baseType) == 2)) + + if ((intrin.category == HW_Category_SIMDByIndexedElement) && (genTypeSize(intrin.baseType) == 2)) { // Some "Advanced SIMD scalar x indexed element" and "Advanced SIMD vector x indexed element" instructions (e.g. // "MLA (by element)") have encoding that restricts what registers that can be used for the indexed element when @@ -1302,6 +1276,27 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } } } + + else if (HWIntrinsicInfo::NeedsConsecutiveRegisters(intrin.id)) + { + if ((intrin.id == NI_AdvSimd_VectorTableLookup) || (intrin.id == NI_AdvSimd_Arm64_VectorTableLookup)) + { + assert(intrin.op2 != nullptr); + srcCount += BuildOperandUses(intrin.op2); + } + else + { + assert(intrin.op2 != nullptr); + assert(intrin.op3 != nullptr); + srcCount += BuildConsecutiveRegisters(intrin.op2, intrin.op1); + srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1) : BuildOperandUses(intrin.op3); + } + assert(dstCount == 1); + buildInternalRegisterUses(); + BuildDef(intrinsicTree); + *pDstCount = 1; + return srcCount; + } else if (intrin.op2 != nullptr) { // RMW intrinsic operands doesn't have to be delayFree when they can be assigned the same register as op1Reg From 961e9c20cf22f255ba18503cd10dc8a0a9ef1e89 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 1 Mar 2023 21:30:08 -0800 Subject: [PATCH 080/125] Add template parameter for allocateReg/copyReg/select --- src/coreclr/jit/lsra.cpp | 82 +++++++++++++++++++++++++++++++++------- src/coreclr/jit/lsra.h | 15 ++++++-- 2 files changed, 80 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 9aaf4ae08ec207..9b3c4b225f2bd4 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2844,11 +2844,24 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // of all but also has a weight lower than 'refPosition'. If there is // no such ref position, no register will be allocated. // - +#ifdef TARGET_ARM64 +template +#endif regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { - regMaskTP foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); + regMaskTP foundRegBit; + +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister) + { + foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); + } + else +#endif // TARGET_ARM64 + { + foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); + } if (foundRegBit == RBM_NONE) { return REG_NA; @@ -3121,6 +3134,9 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R // Prefer a free register that's got the earliest next use. // Otherwise, spill something with the farthest next use // +#ifdef TARGET_ARM64 +template +#endif regNumber LinearScan::assignCopyReg(RefPosition* refPosition) { Interval* currentInterval = refPosition->getInterval(); @@ -3143,7 +3159,17 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) refPosition->copyReg = true; RegisterScore registerScore = NONE; - regNumber allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); + regNumber allocatedReg; +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister) + { + allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); + } + else +#endif + { + allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); + } assert(allocatedReg != REG_NA); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, allocatedReg, nullptr, registerScore)); @@ -5361,7 +5387,7 @@ void LinearScan::allocateRegisters() // to remaining refPosition. assert((currentRefPosition.refType == RefTypeUse) || (currentRefPosition.refType == RefTypeUpperVectorRestore)); - regNumber copyReg = assignCopyReg(¤tRefPosition); + regNumber copyReg = assignCopyReg(¤tRefPosition); lastAllocatedRefPosition = ¤tRefPosition; regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); @@ -5441,7 +5467,18 @@ void LinearScan::allocateRegisters() // It's already in a register, but not one we need. if (!RefTypeIsDef(currentRefPosition.refType)) { - regNumber copyReg = assignCopyReg(¤tRefPosition); + regNumber copyReg; +#ifdef TARGET_ARM64 + if (hasConsecutiveRegister) + { + copyReg = assignCopyReg(¤tRefPosition); + } + else +#endif + { + copyReg = assignCopyReg(¤tRefPosition); + } + lastAllocatedRefPosition = ¤tRefPosition; regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); @@ -5619,15 +5656,24 @@ void LinearScan::allocateRegisters() { unassignPhysReg(currentInterval->assignedReg, nullptr); } - assignedRegister = allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + #ifdef TARGET_ARM64 - if (hasConsecutiveRegister && currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) + if (hasConsecutiveRegister) { - bool consecutiveAssigned = - setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); - assert(consecutiveAssigned); + assignedRegister = + allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) + { + bool consecutiveAssigned = + setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); + assert(consecutiveAssigned); + } } + else #endif // TARGET_ARM64 + { + assignedRegister = allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); + } } // If no register was found, this RefPosition must not require a register. @@ -11919,6 +11965,9 @@ void LinearScan::RegisterSelection::calculateCoversSets() // Return Values: // Register bit selected (a single register) and REG_NA if no register was selected. // +#ifdef TARGET_ARM64 +template +#endif regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { @@ -12168,7 +12217,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, #ifdef TARGET_ARM64 // If this is allocating for consecutive register, we need to make sure that // we allocate register, whose consecutive registers are also free. - if (!refPosition->needsConsecutive) + if (!hasConsecutiveRegister || !refPosition->needsConsecutive) #endif { candidates = prevRegBit; @@ -12200,10 +12249,15 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, #endif // DEBUG #ifdef TARGET_ARM64 - freeCandidates = linearScan->getFreeCandidates(candidates, refPosition); -#else - freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + if (hasConsecutiveRegister) + { + freeCandidates = linearScan->getFreeCandidates(candidates, refPosition); + } + else #endif // TARGET_ARM64 + { + freeCandidates = linearScan->getFreeCandidates(candidates ARM_ARG(regType)); + } // If no free candidates, then double check if refPosition is an actual ref. if (freeCandidates == RBM_NONE) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index cc99d075f3bec3..259b8c68432793 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1156,8 +1156,14 @@ class LinearScan : public LinearScanInterface #ifdef DEBUG const char* getScoreName(RegisterScore score); +#endif +#ifdef TARGET_ARM64 + template #endif regNumber allocateReg(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); +#ifdef TARGET_ARM64 + template +#endif regNumber assignCopyReg(RefPosition* refPosition); bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); @@ -1193,7 +1199,8 @@ class LinearScan : public LinearScanInterface bool areNextConsecutiveRegistersFree(regNumber regToAssign, int registersToCheck, var_types registerType); bool setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); -#else +#endif // TARGET_ARM64 + regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) { regMaskTP result = candidates & m_AvailableRegs; @@ -1207,7 +1214,6 @@ class LinearScan : public LinearScanInterface #endif // TARGET_ARM return result; } -#endif // TARGET_ARM64 #ifdef DEBUG class RegisterSelection; @@ -1223,7 +1229,10 @@ class LinearScan : public LinearScanInterface public: RegisterSelection(LinearScan* linearScan); - // Perform register selection and update currentInterval or refPosition +// Perform register selection and update currentInterval or refPosition +#ifdef TARGET_ARM64 + template +#endif FORCEINLINE regMaskTP select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); From b9d0f1541af989f6045399c0bdca462a0c0dcd1e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 1 Mar 2023 22:15:31 -0800 Subject: [PATCH 081/125] Comments --- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 73 ++++++++++++++++++++++++----------- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 259b8c68432793..0d65f4450438f8 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1964,7 +1964,7 @@ class LinearScan : public LinearScanInterface #ifdef FEATURE_HW_INTRINSICS int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount); #ifdef TARGET_ARM64 - int BuildConsecutiveRegisters(GenTree* treeNode, GenTree* rmwNode = nullptr); + int BuildConsecutiveRegistersForUse(GenTree* treeNode); #endif #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 802fb7f6451bae..7b36af62c00797 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -47,15 +47,17 @@ RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) // setNextConsecutiveRegisterAssignment: For subsequent refPositions, set the register // requirement to be the consecutive register(s) of the register that is assigned to // the firstRefPosition. +// If one of the subsequent RefPosition is RefTypeUpperVectorRestore, sets the +// registerAssignment to not include any of the consecutive registers that are being +// assigned to the RefTypeUse refpositions. // // Arguments: // firstRefPosition - First refPosition of the series of consecutive registers. -// firstReg - Register assigned to the first refposition. +// firstRegAssigned - Register assigned to the first refposition. // // Returns: // True if all the consecutive registers starting from `firstRegAssigned` were free. Even if one -// of them is busy, returns false and does not change the registerAssignment of a subsequent -// refPosition. +// of them is busy, returns false and does not change the registerAssignment of any refPositions. // bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned) { @@ -116,11 +118,10 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit } //------------------------------------------------------------------------ -// areNextConsecutiveRegistersBusy: Starting with `regToAssign`, check if next -// `registersToCheck` are free or not. +// areNextConsecutiveRegistersFree: Starting with `regToAssign`, check if next +// consecutive `registersToCheck` are free or not. // // Arguments: -// - First refPosition of the series of consecutive registers. // regToAssign - Register assigned to the first refposition. // registersCount - Number of registers to check. // registerType - Type of register. @@ -143,6 +144,19 @@ bool LinearScan::areNextConsecutiveRegistersFree(regNumber regToAssign, int regi return true; } +//------------------------------------------------------------------------ +// getFreeCandidates: Returns the mask of all the free candidates for given refPosition. +// If refPosition is the first RefPosition of a series of refpositions that needs +// consecutive registers, then returns only the mask such that it satisfies the need +// of having free consecutive registers. +// +// Arguments: +// candidates - Register assigned to the first refposition. +// refPosition - Number of registers to check. +// +// Returns: +// Register mask of all the free registers +// regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) { regMaskTP result = candidates & m_AvailableRegs; @@ -208,9 +222,10 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo if (compiler->opts.OptimizationEnabled()) { - // One last time, check if subsequent refpositions already have consecutive registers assigned - // and if yes, and if one of the register out of consecutiveResult is available for the first - // refposition, then just use that. This will avoid unnecessary copies. + // One last time, check if subsequent refpositions (all refpositions except the first for which + // we assigned above) already have consecutive registers assigned. If yes, and if one of the + // register out of the `consecutiveResult` is available for the first refposition, then just use + // that. This will avoid unnecessary copies. regNumber firstRegNum = REG_NA; regNumber prevRegNum = REG_NA; @@ -1229,7 +1244,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - srcCount += BuildConsecutiveRegisters(intrin.op1); + srcCount += BuildConsecutiveRegistersForUse(intrin.op1); } } @@ -1288,7 +1303,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { assert(intrin.op2 != nullptr); assert(intrin.op3 != nullptr); - srcCount += BuildConsecutiveRegisters(intrin.op2, intrin.op1); + srcCount += BuildConsecutiveRegistersForUse(intrin.op2); srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1) : BuildOperandUses(intrin.op3); } assert(dstCount == 1); @@ -1367,19 +1382,31 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou return srcCount; } -int LinearScan::BuildConsecutiveRegisters(GenTree* treeNode, GenTree* rmwNode) +//------------------------------------------------------------------------ +// BuildConsecutiveRegistersForUse: Build ref position(s) for `treeNode` that has a +// requirement of allocating consecutive registers. It will create the RefTypeUse +// RefPositions for as many consecutive registers are needed for `treeNode` and in +// between, it might contain RefTypeUpperVectorRestore RefPositions. +// +// For the first RefPosition of the series, it sets the `regCount` field equal to +// the number of subsequent RefPositions (including the first one) involved for this +// treeNode. For the subsequent RefPositions, it sets the `regCount` to 0. For all +// the RefPositions created, it sets the `needsConsecutive` flag so it can be used to +// identify these RefPositions during allocation. +// +// It also populates a `refPositionMap` to access the subsequent RefPositions from +// a given RefPosition. This was preferred rather than adding a field in RefPosition +// for this purpose. +// +// Arguments: +// treeNode - The GT_HWINTRINSIC node of interest +// +// Return Value: +// The number of sources consumed by this node. +// +int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) { - int srcCount = 0; - Interval* rmwInterval = nullptr; - bool rmwIsLastUse = false; - if ((rmwNode != nullptr)) - { - if (isCandidateLocalRef(rmwNode)) - { - rmwInterval = getIntervalForLocalVarNode(rmwNode->AsLclVar()); - rmwIsLastUse = rmwNode->AsLclVar()->IsLastUse(0); - } - } + int srcCount = 0; if (treeNode->OperIsFieldList()) { unsigned regCount = 0; From cbe999f69718770eeaa6f27d858954f6b8a0c50d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 2 Mar 2023 13:57:18 -0800 Subject: [PATCH 082/125] Fix mono failures --- src/mono/mono/mini/simd-intrinsics.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index c1d65f744a9aae..20bbb37358c5e3 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -3372,13 +3372,13 @@ emit_arm64_intrinsics ( return ret; } case SN_VectorTableLookup: - if (!type_is_simd_vector (fsig->params [0])) + if (!type_is_simd_vector (fsig->params [0]) || !type_is_simd_vector (fsig->params [1])) return NULL; return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBL1, 0, fsig, args); case SN_VectorTableLookupExtension: - if (!type_is_simd_vector (fsig->params [0])) + if (!type_is_simd_vector (fsig->params [0]) || !type_is_simd_vector (fsig->params [1])) return NULL; - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBX1, 0, fsig, args); + return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBX1, 0, fsig, args); default: g_assert_not_reached (); } From 66655368dd24a0ab0533d99e1897992369343e1e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 7 Mar 2023 10:41:26 -0800 Subject: [PATCH 083/125] Added some more comments --- src/coreclr/jit/lsra.cpp | 20 ++++++++++++++++---- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 9 ++++++--- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 9b3c4b225f2bd4..67ca39542ee92a 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5374,6 +5374,9 @@ void LinearScan::allocateRegisters() #ifdef TARGET_ARM64 if (hasConsecutiveRegister && currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) { + // For consecutive registers, if the first RefPosition is already assigned to a register, + // check if consecutive registers are free so they can be assigned to the subsequent + // RefPositions. if (areNextConsecutiveRegistersFree(assignedRegister, currentRefPosition.regCount, currentRefPosition.getInterval()->registerType)) { @@ -5383,8 +5386,8 @@ void LinearScan::allocateRegisters() } else { - // It doesn't satisfy, so do a copyReg followed by assigning consecutive registers - // to remaining refPosition. + // It doesn't satisfy, so do a copyReg for the first RefPosition to such a register, so + // it would be possible to allocate consecutive registers to the subsequent RefPositions. assert((currentRefPosition.refType == RefTypeUse) || (currentRefPosition.refType == RefTypeUpperVectorRestore)); regNumber copyReg = assignCopyReg(¤tRefPosition); @@ -5488,10 +5491,19 @@ void LinearScan::allocateRegisters() { if (currentRefPosition.regCount != 0) { + // If the first RefPosition was not assigned to the register we wanted and we added + // a copyReg for it, then allocate the subsequent RefPositions with the consecutive + // registers. bool consecutiveAssigned = setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); assert(consecutiveAssigned); } + else + { + // For non-first RefPositions, if they were not in the register that we wanted, we + // added a copyReg for them to move it to the desired register. No further action is + // needed. + } // For consecutive register, it doesn't matter what the assigned register was. // We have just assigned it `copyRegMask` and that's the one in-use, and not the @@ -5562,9 +5574,8 @@ void LinearScan::allocateRegisters() { // For consecutive register, we would like to assign a register (if not already assigned) // to the 1st refPosition and the subsequent refPositions will just get the consecutive register. - if (currentRefPosition.regCount > 0) + if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) { - // 1st refPosition of the series... if (assignedRegister != REG_NA) { // For the 1st refPosition, if it already has a register assigned, then just assign @@ -5598,6 +5609,7 @@ void LinearScan::allocateRegisters() assignedRegister = REG_NA; if (assignedRegBit != RBM_NONE) { + // Also unassign the register currently assigned to it. RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); unassignPhysRegNoSpill(physRegRecord); } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 0d65f4450438f8..8f80054e45367b 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1390,7 +1390,7 @@ class LinearScan : public LinearScanInterface } return nextConsecutiveRefPositionMap; } - RefPosition* getNextConsecutiveRefPosition(RefPosition* refPosition); + FORCEINLINE RefPosition* getNextConsecutiveRefPosition(RefPosition* refPosition); #endif #ifdef DEBUG diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 7b36af62c00797..0c4ad88872c8a8 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1436,13 +1436,16 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE if (restoreRefPos != nullptr) { - // If there was a restoreRefPosition created, make sure - // to link it as well so it gets same registerAssignment + // If there was a restoreRefPosition created, make sure to link it + // as well so during register assignment, we could visit it and + // make sure that it doesn't get assigned one of register that is part + // of consecutive registers we are allocating for this treeNode. + // See setNextConsecutiveRegisterAssignment(). restoreRefPos->needsConsecutive = true; restoreRefPos->regCount = 0; if (firstRefPos == nullptr) { - // Always set the non UpperVectorRestore. UpperVectorRestore can be assigned + // Always set the non UpperVectorRestore. UpperVectorRestore can be assigned // different independent register. // See TODO-CQ in setNextConsecutiveRegisterAssignment(). firstRefPos = currRefPos; From 2b9f49e587a4ec7883c877c7700d5606881ced80 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 17 Mar 2023 14:33:02 -0700 Subject: [PATCH 084/125] Call allocateReg/assignCopyReg/select methods only for refpositions that need consecutive registers --- src/coreclr/jit/fginline.cpp | 4 ++++ src/coreclr/jit/lsra.cpp | 24 ++++++++++++++---------- src/coreclr/jit/lsra.h | 4 ++-- src/coreclr/jit/lsraarm64.cpp | 11 ++++++++--- 4 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 969943ccf83f6c..81c7c811d12d2f 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -1453,6 +1453,10 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) lvaGenericsContextInUse |= InlineeCompiler->lvaGenericsContextInUse; +#ifdef TARGET_ARM64 + info.needsConsecutiveRegisters |= InlineeCompiler->info.needsConsecutiveRegisters; +#endif + // If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler. // TODO-CQ: Implement hot/cold splitting of methods with switch tables. if (InlineeCompiler->fgHasSwitch && opts.compProcedureSplitting) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 67ca39542ee92a..c1de23e781c91b 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1305,6 +1305,10 @@ PhaseStatus LinearScan::doLinearScan() compiler->codeGen->regSet.rsClearRegsModified(); initMaxSpill(); + +#ifdef TARGET_ARM64 + nextConsecutiveRefPositionMap = nullptr; +#endif buildIntervals(); DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_REFPOS)); compiler->EndPhase(PHASE_LINEAR_SCAN_BUILD); @@ -1314,7 +1318,6 @@ PhaseStatus LinearScan::doLinearScan() initVarRegMaps(); #ifdef TARGET_ARM64 - nextConsecutiveRefPositionMap = nullptr; if (compiler->info.needsConsecutiveRegisters) { allocateRegisters(); @@ -2845,7 +2848,7 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // no such ref position, no register will be allocated. // #ifdef TARGET_ARM64 -template +template #endif regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) @@ -2853,7 +2856,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, regMaskTP foundRegBit; #ifdef TARGET_ARM64 - if (hasConsecutiveRegister) + if (needsConsecutiveRegisters) { foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); } @@ -3135,7 +3138,7 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R // Otherwise, spill something with the farthest next use // #ifdef TARGET_ARM64 -template +template #endif regNumber LinearScan::assignCopyReg(RefPosition* refPosition) { @@ -3161,8 +3164,9 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) RegisterScore registerScore = NONE; regNumber allocatedReg; #ifdef TARGET_ARM64 - if (hasConsecutiveRegister) + if (needsConsecutiveRegisters) { + assert(refPosition->needsConsecutive); allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); } else @@ -5472,7 +5476,7 @@ void LinearScan::allocateRegisters() { regNumber copyReg; #ifdef TARGET_ARM64 - if (hasConsecutiveRegister) + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) { copyReg = assignCopyReg(¤tRefPosition); } @@ -5670,7 +5674,7 @@ void LinearScan::allocateRegisters() } #ifdef TARGET_ARM64 - if (hasConsecutiveRegister) + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) { assignedRegister = allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); @@ -11978,7 +11982,7 @@ void LinearScan::RegisterSelection::calculateCoversSets() // Register bit selected (a single register) and REG_NA if no register was selected. // #ifdef TARGET_ARM64 -template +template #endif regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) @@ -12229,7 +12233,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, #ifdef TARGET_ARM64 // If this is allocating for consecutive register, we need to make sure that // we allocate register, whose consecutive registers are also free. - if (!hasConsecutiveRegister || !refPosition->needsConsecutive) + if (!needsConsecutiveRegisters) #endif { candidates = prevRegBit; @@ -12261,7 +12265,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, #endif // DEBUG #ifdef TARGET_ARM64 - if (hasConsecutiveRegister) + if (needsConsecutiveRegisters) { freeCandidates = linearScan->getFreeCandidates(candidates, refPosition); } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 8f80054e45367b..7c337bf3c720fa 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1158,11 +1158,11 @@ class LinearScan : public LinearScanInterface const char* getScoreName(RegisterScore score); #endif #ifdef TARGET_ARM64 - template + template #endif regNumber allocateReg(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); #ifdef TARGET_ARM64 - template + template #endif regNumber assignCopyReg(RefPosition* refPosition); diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 0c4ad88872c8a8..efe2c8dbfa3b2d 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -36,6 +36,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) { + assert(compiler->info.needsConsecutiveRegisters); RefPosition* nextRefPosition; assert(refPosition->needsConsecutive); nextConsecutiveRefPositionMap->Lookup(refPosition, &nextRefPosition); @@ -132,6 +133,7 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit // bool LinearScan::areNextConsecutiveRegistersFree(regNumber regToAssign, int registersCount, var_types registerType) { + assert(compiler->info.needsConsecutiveRegisters); for (int i = 0; i < registersCount; i++) { if (isRegInUse(regToAssign, registerType)) @@ -159,6 +161,7 @@ bool LinearScan::areNextConsecutiveRegistersFree(regNumber regToAssign, int regi // regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) { + assert(compiler->info.needsConsecutiveRegisters); regMaskTP result = candidates & m_AvailableRegs; if (!refPosition->isFirstRefPositionOfConsecutiveRegisters()) { @@ -220,7 +223,7 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo currAvailableRegs &= ~endMask; } while (currAvailableRegs != RBM_NONE); - if (compiler->opts.OptimizationEnabled()) + if (compiler->opts.OptimizationEnabled() && (overallResult != RBM_NONE)) { // One last time, check if subsequent refpositions (all refpositions except the first for which // we assigned above) already have consecutive registers assigned. If yes, and if one of the @@ -1406,6 +1409,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) { + assert(compiler->info.needsConsecutiveRegisters); int srcCount = 0; if (treeNode->OperIsFieldList()) { @@ -1445,8 +1449,9 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) restoreRefPos->regCount = 0; if (firstRefPos == nullptr) { - // Always set the non UpperVectorRestore. UpperVectorRestore can be assigned - // different independent register. + // Always set the non UpperVectorRestore as the firstRefPos. + // UpperVectorRestore can be assigned to a different independent + // register. // See TODO-CQ in setNextConsecutiveRegisterAssignment(). firstRefPos = currRefPos; } From 5fec6e1e8749aa6e8604bddcb82fb50000ac590e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sat, 18 Mar 2023 17:09:15 -0700 Subject: [PATCH 085/125] Add heuristics to pick best possible set of registers which will need less spilling --- src/coreclr/jit/lsraarm64.cpp | 178 ++++++++++++++++++++++++++-------- 1 file changed, 140 insertions(+), 38 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index efe2c8dbfa3b2d..310937cdbd5677 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -168,15 +168,17 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo return result; } - unsigned int registersNeeded = refPosition->regCount; - regMaskTP currAvailableRegs = result; - if (BitOperations::PopCount(currAvailableRegs) < registersNeeded) - { - // If number of free registers are less than what we need, no point in scanning - // for them. - return RBM_NONE; - } + unsigned int registersNeeded = refPosition->regCount; + + + regMaskTP currAvailableRegs = result; + regMaskTP overallResult = RBM_NONE; + regMaskTP consecutiveResult = RBM_NONE; + regMaskTP consecutiveResultForBusy = RBM_NONE; + regMaskTP busyRegsInThisLocation = regsBusyUntilKill | regsInUseThisLocation; + if (BitOperations::PopCount(currAvailableRegs) >= registersNeeded) + { // At this point, for 'n' registers requirement, if Rm+1, Rm+2, Rm+3, ..., Rm+k are // available, create the mask only for Rm+1, Rm+2, ..., Rm+(k-n+1) to convey that it // is safe to assign any of those registers, but not beyond that. @@ -186,44 +188,111 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; - regMaskTP overallResult = RBM_NONE; - regMaskTP consecutiveResult = RBM_NONE; - DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; - do - { - // From LSB, find the first available register (bit `1`) - BitScanForward64(®AvailableStartIndex, static_cast(currAvailableRegs)); - regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; - // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. - regMaskTP maskProcessed = ~(currAvailableRegs | startMask); + DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; + + // If we don't find consecutive registers, also track which registers we can pick so + // as to reduce the number of registers we will have to spill, to accomodate the + // request of the consecutive registers. + bool trackForBusyCandidates = true; + int maxSpillRegs = registersNeeded; + regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; - // From regAvailableStart, find the first unavailable register (bit `0`). - if (maskProcessed == 0) + do { - regAvailableEndIndex = 64; + // From LSB, find the first available register (bit `1`) + BitScanForward64(®AvailableStartIndex, static_cast(currAvailableRegs)); + regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; + + // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. + regMaskTP maskProcessed = ~(currAvailableRegs | startMask); + + // From regAvailableStart, find the first unavailable register (bit `0`). + if (maskProcessed == 0) + { + regAvailableEndIndex = 64; + if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) + { + AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, currAvailableRegs); + trackForBusyCandidates = false; + consecutiveResultForBusy = RBM_NONE; + } + else + { + // We reached a set of registers where there are not enough consecutive registers. + // Move a registersNeeded size window for all the available registers and track for which + // one we can spill least number of registers. + + for (DWORD i = regAvailableStartIndex; i < regAvailableEndIndex; i++) + { + regMaskTP maskForCurRange = registersNeededMask << i; + if ((maskForCurRange & busyRegsInThisLocation) != RBM_NONE) + { + // If any register between i and (i + registersNeeded) contains one or more + // register that are busy, then we cannot that entire range. + continue; + } + int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange) + 1; + + if (curSpillRegs < maxSpillRegs) + { + // We found a series that will need fewer registers to be spilled. + // Reset whatever we found so far and start accumulating the result again. + consecutiveResultForBusy = RBM_NONE; + maxSpillRegs = curSpillRegs; + } + + consecutiveResultForBusy |= 1ULL << i; + } + } + break; + } + else + { + BitScanForward64(®AvailableEndIndex, static_cast(maskProcessed)); + } + regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; + + // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available + // If they are equal to or greater than our register requirements, then add all of them to the result. if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) { - AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, currAvailableRegs); + AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, (endMask & ~startMask)); + trackForBusyCandidates = false; + consecutiveResultForBusy = RBM_NONE; } - break; - } - else - { - BitScanForward64(®AvailableEndIndex, static_cast(maskProcessed)); - } - regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; + else if (trackForBusyCandidates) + { + // We reached a set of registers where there are not enough consecutive registers. + // Move a registersNeeded size window for all the available registers and track for which + // one we can spill least number of registers. - // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available - // If they are equal to or greater than our register requirements, then add all of them to the result. - if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) - { - AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, (endMask & ~startMask)); - } - currAvailableRegs &= ~endMask; - } while (currAvailableRegs != RBM_NONE); + for (DWORD i = regAvailableStartIndex; i < regAvailableEndIndex; i++) + { + regMaskTP maskForCurRange = registersNeededMask << i; + if ((maskForCurRange & busyRegsInThisLocation) != RBM_NONE) + { + // If any register between i and (i + registersNeeded) contains one or more + // register that are busy, then we cannot that entire range. + continue; + } + int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange) + 1; + if (curSpillRegs < maxSpillRegs) + { + // We found a series that will need fewer registers to be spilled. + // Reset whatever we found so far and start accumulating the result again. + consecutiveResultForBusy = RBM_NONE; + maxSpillRegs = curSpillRegs; + } + + consecutiveResultForBusy |= 1ULL << i; + } + } + currAvailableRegs &= ~endMask; + } while (currAvailableRegs != RBM_NONE); + } - if (compiler->opts.OptimizationEnabled() && (overallResult != RBM_NONE)) + if (overallResult != RBM_NONE) { // One last time, check if subsequent refpositions (all refpositions except the first for which // we assigned above) already have consecutive registers assigned. If yes, and if one of the @@ -281,6 +350,39 @@ regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPo } } } + else + { + // There are enough registers available but they are not consecutive. + // Here are some options to address them: + // + // 1. Scan once again the available registers and find a set which has maximum register available. + // In other words, try to find register sequence that needs fewer registers to be spilled. This + // will give optimal CQ. + // + // 2. Check if some of the refpositions in the series are already in *somewhat* consecutive registers + // and if yes, assign that register sequence. That way, we will avoid copying values of + // refpositions that are already positioned in the desired registers. Checking this is beneficial + // only if it can happen frequently. So for RefPositions , it should + // be that, RP# 6 is already in V14 and RP# 7 is already in V16. But this can be rare (not tested). + // In future, if we see such cases being hit, we could use this heuristics. + // + // 3. Give one of the free register to the first position and the algorithm will + // give the subsequent consecutive registers (free or busy) to the remaining refpositions + // of the series. This may not give optimal CQ however. + // + // 4. Return the set of available registers and let selection heuristics pick one of them to get + // assigned to the first refposition. Remaining refpositions will be assigned to the subsequent + // registers (if busy, they will be spilled), similar to #3 above and will not give optimal CQ. + // + // + // Among `consecutiveResultForBusy`, we could shortlist the registers that are beneficial from "busy register + // selection" heuristics perspective. However, we would need to add logic of try_SPILL_COST(), try_FAR_NEXT_REF(), + // etc. here which would complicate things. Instead, we just go with option# 1 and select registers based on fewer + // number of registers that has to be spilled. + // + + consecutiveResult = consecutiveResultForBusy; + } return consecutiveResult; } From 5371c3017ba8ed81fa09c4031dfa7b8eb84e2154 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sat, 18 Mar 2023 17:09:55 -0700 Subject: [PATCH 086/125] setNextConsecutiveRegisterAssignment() no longer checks for areNextConsecutiveRegistersFree() --- src/coreclr/jit/lsra.cpp | 13 ++++--------- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 22 ++++++---------------- 3 files changed, 11 insertions(+), 26 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index c1de23e781c91b..b08bf04bf666da 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5398,8 +5398,7 @@ void LinearScan::allocateRegisters() lastAllocatedRefPosition = ¤tRefPosition; regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); - bool consecutiveAssigned = setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); - assert(consecutiveAssigned); + setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); // For consecutive register, it doesn't matter what the assigned register was. // We have just assigned it `copyRegMask` and that's the one in-use, and not the @@ -5498,9 +5497,7 @@ void LinearScan::allocateRegisters() // If the first RefPosition was not assigned to the register we wanted and we added // a copyReg for it, then allocate the subsequent RefPositions with the consecutive // registers. - bool consecutiveAssigned = - setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); - assert(consecutiveAssigned); + setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); } else { @@ -5586,7 +5583,7 @@ void LinearScan::allocateRegisters() // subsequent registers to the remaining position and skip the allocation for the // 1st refPosition altogether. - if (!setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister)) + if (!areNextConsecutiveRegistersFree(¤tRefPosition, assignedRegister)) { // The consecutive registers are busy. Force to allocate even for the 1st // refPosition @@ -5680,9 +5677,7 @@ void LinearScan::allocateRegisters() allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) { - bool consecutiveAssigned = - setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); - assert(consecutiveAssigned); + setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); } } else diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 7c337bf3c720fa..6d34a98ae1a07c 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1197,7 +1197,7 @@ class LinearScan : public LinearScanInterface #if defined(TARGET_ARM64) bool areNextConsecutiveRegistersFree(regNumber regToAssign, int registersToCheck, var_types registerType); - bool setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); + void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 310937cdbd5677..1acdc2e8e7e885 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -56,27 +56,19 @@ RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) // firstRefPosition - First refPosition of the series of consecutive registers. // firstRegAssigned - Register assigned to the first refposition. // -// Returns: -// True if all the consecutive registers starting from `firstRegAssigned` were free. Even if one -// of them is busy, returns false and does not change the registerAssignment of any refPositions. +// Note: +// This method will set the registerAssignment of subsequent RefPositions with consecutive registers. +// Some of the registers could be busy, and they will be spilled. We would end up with busy registers if +// we did not find free consecutive registers. // -bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned) +void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned) { + assert(compiler->info.needsConsecutiveRegisters); assert(firstRefPosition->assignedReg() == firstRegAssigned); assert(isSingleRegister(genRegMask(firstRegAssigned))); assert(firstRefPosition->isFirstRefPositionOfConsecutiveRegisters()); assert(emitter::isVectorRegister(firstRegAssigned)); - // Verify that all the consecutive registers needed are free, if not, return false. - // Need to do this before we set registerAssignment of any of the refPositions that - // are part of the range. - - if (!areNextConsecutiveRegistersFree(firstRegAssigned, firstRefPosition->regCount, - firstRefPosition->getInterval()->registerType)) - { - return false; - } - RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); regNumber regToAssign = firstRegAssigned == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstRegAssigned); @@ -114,8 +106,6 @@ bool LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit } assert(refPosCount == firstRefPosition->regCount); - - return true; } //------------------------------------------------------------------------ From 48759252c474024cbbd6eba12a6f3f5db8d07a14 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sat, 18 Mar 2023 17:28:44 -0700 Subject: [PATCH 087/125] Rename getFreeCandidates() -> getConsecutiveCandidates() --- src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 11 ++++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index b08bf04bf666da..4c8add4a820a9d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12262,7 +12262,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, #ifdef TARGET_ARM64 if (needsConsecutiveRegisters) { - freeCandidates = linearScan->getFreeCandidates(candidates, refPosition); + freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition); } else #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 6d34a98ae1a07c..a73c1bc4738d74 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1198,7 +1198,7 @@ class LinearScan : public LinearScanInterface #if defined(TARGET_ARM64) bool areNextConsecutiveRegistersFree(regNumber regToAssign, int registersToCheck, var_types registerType); void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); - regMaskTP getFreeCandidates(regMaskTP candidates, RefPosition* refPosition); + regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition); #endif // TARGET_ARM64 regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 1acdc2e8e7e885..49aaee474d2fc2 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -137,19 +137,20 @@ bool LinearScan::areNextConsecutiveRegistersFree(regNumber regToAssign, int regi } //------------------------------------------------------------------------ -// getFreeCandidates: Returns the mask of all the free candidates for given refPosition. -// If refPosition is the first RefPosition of a series of refpositions that needs +// getConsecutiveCandidates: Returns the mask of all the consecutive candidates +// for given refPosition. For first RefPosition of a series of refpositions that needs // consecutive registers, then returns only the mask such that it satisfies the need -// of having free consecutive registers. +// of having free consecutive registers. If free consecutive registers are not available +// it finds such a series that needs fewer registers spilling. // // Arguments: // candidates - Register assigned to the first refposition. // refPosition - Number of registers to check. // // Returns: -// Register mask of all the free registers +// Register mask of consecutive registers. // -regMaskTP LinearScan::getFreeCandidates(regMaskTP candidates, RefPosition* refPosition) +regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition) { assert(compiler->info.needsConsecutiveRegisters); regMaskTP result = candidates & m_AvailableRegs; From 597e6debb41f182bd896dd8991140fc3ea9e9d31 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sat, 18 Mar 2023 17:34:01 -0700 Subject: [PATCH 088/125] fix parameters to areNextConsecutiveRegistersFree() --- src/coreclr/jit/lsra.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 4c8add4a820a9d..9a6346a222ef4c 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5583,7 +5583,8 @@ void LinearScan::allocateRegisters() // subsequent registers to the remaining position and skip the allocation for the // 1st refPosition altogether. - if (!areNextConsecutiveRegistersFree(¤tRefPosition, assignedRegister)) + if (!areNextConsecutiveRegistersFree(assignedRegister, currentRefPosition.regCount, + currentRefPosition.getInterval()->registerType)) { // The consecutive registers are busy. Force to allocate even for the 1st // refPosition From 4a1171d178261ac7684a6bbe813883ab54377f72 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 19 Mar 2023 11:03:56 -0700 Subject: [PATCH 089/125] Rename and update canAssignNextConsecutiveRegisters() --- src/coreclr/jit/lsra.cpp | 6 ++---- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 38 +++++++++++++++++++++++------------ 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 9a6346a222ef4c..fa9e4bbe4b5d74 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5381,8 +5381,7 @@ void LinearScan::allocateRegisters() // For consecutive registers, if the first RefPosition is already assigned to a register, // check if consecutive registers are free so they can be assigned to the subsequent // RefPositions. - if (areNextConsecutiveRegistersFree(assignedRegister, currentRefPosition.regCount, - currentRefPosition.getInterval()->registerType)) + if (canAssignNextConsecutiveRegisters(¤tRefPosition, assignedRegister)) { // Current assignedRegister satisfies the consecutive registers requirements currentRefPosition.registerAssignment = assignedRegBit; @@ -5583,8 +5582,7 @@ void LinearScan::allocateRegisters() // subsequent registers to the remaining position and skip the allocation for the // 1st refPosition altogether. - if (!areNextConsecutiveRegistersFree(assignedRegister, currentRefPosition.regCount, - currentRefPosition.getInterval()->registerType)) + if (!canAssignNextConsecutiveRegisters(¤tRefPosition, assignedRegister)) { // The consecutive registers are busy. Force to allocate even for the 1st // refPosition diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index a73c1bc4738d74..8570a298745f0f 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1196,7 +1196,7 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ #if defined(TARGET_ARM64) - bool areNextConsecutiveRegistersFree(regNumber regToAssign, int registersToCheck, var_types registerType); + bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition); #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 49aaee474d2fc2..7d409f2dfd2dc7 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -109,29 +109,41 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit } //------------------------------------------------------------------------ -// areNextConsecutiveRegistersFree: Starting with `regToAssign`, check if next -// consecutive `registersToCheck` are free or not. +// canAssignNextConsecutiveRegisters: Starting with `firstRegAssigned`, check if next +// consecutive registers are free or are already assigned to the subsequent RefPositions. // // Arguments: +// firstRefPosition - First refPosition of the series of consecutive registers. // regToAssign - Register assigned to the first refposition. -// registersCount - Number of registers to check. -// registerType - Type of register. // // Returns: -// True if all the consecutive registers starting from `regToAssign` were free. Even if one -// of them is busy, returns false. +// True if all the consecutive registers starting from `firstRegAssigned` are assignable. +// Even if one of them is busy, returns false. // -bool LinearScan::areNextConsecutiveRegistersFree(regNumber regToAssign, int registersCount, var_types registerType) +bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned) { - assert(compiler->info.needsConsecutiveRegisters); - for (int i = 0; i < registersCount; i++) + int registersCount = firstRefPosition->regCount; + RefPosition* nextRefPosition = firstRefPosition; + regNumber regToAssign = firstRegAssigned; + assert(compiler->info.needsConsecutiveRegisters && registersCount > 1); + + int i = 1; + do { - if (isRegInUse(regToAssign, registerType)) + nextRefPosition = getNextConsecutiveRefPosition(nextRefPosition); + regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); + if (!isFree(getRegisterRecord(regToAssign))) { - return false; + // If regToAssign is not free, check if it is already assigned to the interval corresponding + // to the subsequent nextRefPosition. If yes, it would just use regToAssign for that nextRefPosition. + if ((nextRefPosition->getInterval() != nullptr) && + (nextRefPosition->getInterval()->assignedReg != nullptr) && + ((nextRefPosition->getInterval()->assignedReg->regNum != regToAssign))) + { + return false; + } } - regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); - } + } while (++i != registersCount); return true; } From 1f124a4ba15571edb6d76de8a959edb9192c255a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 19 Mar 2023 11:29:27 -0700 Subject: [PATCH 090/125] Add the missing setNextConsecutiveRegisterAssignment() calls --- src/coreclr/jit/lsra.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index fa9e4bbe4b5d74..fc9c75af3ef550 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5386,6 +5386,8 @@ void LinearScan::allocateRegisters() // Current assignedRegister satisfies the consecutive registers requirements currentRefPosition.registerAssignment = assignedRegBit; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); + + setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); } else { @@ -5491,7 +5493,7 @@ void LinearScan::allocateRegisters() #ifdef TARGET_ARM64 if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) { - if (currentRefPosition.regCount != 0) + if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) { // If the first RefPosition was not assigned to the register we wanted and we added // a copyReg for it, then allocate the subsequent RefPositions with the consecutive @@ -5591,6 +5593,10 @@ void LinearScan::allocateRegisters() currentRefPosition.registerAssignment = allRegs(currentInterval->registerType); unassignPhysRegNoSpill(physRegRecord); } + else + { + setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); + } } } else From 9686773d406c22c4dec50a8f777b6c10bbcc1a19 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 Mar 2023 20:55:38 -0700 Subject: [PATCH 091/125] Fix a condition for upperVector --- src/coreclr/jit/lsraarm64.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 7d409f2dfd2dc7..f94eedd860da52 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -76,7 +76,7 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit assert(firstRefPosition->refType != RefTypeUpperVectorRestore); INDEBUG(int refPosCount = 1); - regMaskTP busyConsecutiveRegMask = ~(((1ULL << firstRefPosition->regCount) - 1) << firstRegAssigned); + regMaskTP busyConsecutiveRegMask = (((1ULL << firstRefPosition->regCount) - 1) << firstRegAssigned); while (consecutiveRefPosition != nullptr) { @@ -84,7 +84,10 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE if (consecutiveRefPosition->refType == RefTypeUpperVectorRestore) { - if (consecutiveRefPosition->getInterval()->isPartiallySpilled) + Interval* srcInterval = consecutiveRefPosition->getInterval(); + assert(srcInterval->isUpperVector); + assert(srcInterval->relatedInterval != nullptr); + if (srcInterval->relatedInterval->isPartiallySpilled) { // Make sure that restore doesn't get one of the registers that are part of series we are trying to set // currently. From 55071f6add69b1993e013b99b0944e93f2cd1125 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 Mar 2023 20:57:21 -0700 Subject: [PATCH 092/125] Update spill heurisitic to handle cases for jitstressregs --- src/coreclr/jit/lsra.cpp | 14 +++++++++++++- src/coreclr/jit/lsraarm64.cpp | 10 ++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index fc9c75af3ef550..2d9f39574c5711 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3104,7 +3104,11 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R // busy until the next kill. assert(!isRegBusy(physRegRecord->regNum, current->registerType)); // We should already have determined that the register isn't actively in use. +#ifdef TARGET_ARM64 + assert(!isRegInUse(physRegRecord->regNum, current->registerType) || refPosition->needsConsecutive); +#else assert(!isRegInUse(physRegRecord->regNum, current->registerType)); +#endif // We shouldn't be calling this if 'refPosition' is a fixed reference to this register. assert(!refPosition->isFixedRefOfRegMask(candidateBit)); // We shouldn't be calling this if there is a fixed reference at the same location @@ -11691,6 +11695,7 @@ void LinearScan::RegisterSelection::try_SPILL_COST() weight_t bestSpillWeight = FloatingPointUtils::infinite_double(); // True if we found registers with lower spill weight than this refPosition. bool foundLowerSpillWeight = false; + LsraLocation thisLocation = refPosition->nodeLocation; for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE;) { @@ -11702,7 +11707,14 @@ void LinearScan::RegisterSelection::try_SPILL_COST() // Can and should the interval in this register be spilled for this one, // if we don't find a better alternative? - if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == refPosition->nodeLocation) && +#ifdef TARGET_ARM64 + if (linearScan->isRefPositionActive(assignedInterval->recentRefPosition, thisLocation) && (assignedInterval->recentRefPosition->needsConsecutive)) + { + continue; + } +#endif + + if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == thisLocation) && !assignedInterval->getNextRefPosition()->RegOptional()) { continue; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index f94eedd860da52..82b151cc9c6258 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1585,6 +1585,16 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) // Just `regCount` to actual registers count for first ref-position. // For others, set 0 so we can identify that this is non-first refposition. firstRefPos->regCount = regCount; + +#ifdef DEBUG + // Set the minimum register candidates needed for stress to work. + currRefPos = firstRefPos; + while (currRefPos != nullptr) + { + currRefPos->minRegCandidateCount = regCount; + currRefPos = getNextConsecutiveRefPosition(currRefPos); + } +#endif srcCount += regCount; } else From 757c6825cb8a0bac612f47df8edc3c7457eea004 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 Mar 2023 20:57:54 -0700 Subject: [PATCH 093/125] Misc. remove popcount() check from getConsecutiveRegisters() --- src/coreclr/jit/lsraarm64.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 82b151cc9c6258..a1e19a2f6890a1 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -169,22 +169,18 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP candidates, RefPosition { assert(compiler->info.needsConsecutiveRegisters); regMaskTP result = candidates & m_AvailableRegs; - if (!refPosition->isFirstRefPositionOfConsecutiveRegisters()) + if (!refPosition->isFirstRefPositionOfConsecutiveRegisters() || (result == RBM_NONE)) { return result; } unsigned int registersNeeded = refPosition->regCount; - - regMaskTP currAvailableRegs = result; regMaskTP overallResult = RBM_NONE; regMaskTP consecutiveResult = RBM_NONE; regMaskTP consecutiveResultForBusy = RBM_NONE; regMaskTP busyRegsInThisLocation = regsBusyUntilKill | regsInUseThisLocation; - if (BitOperations::PopCount(currAvailableRegs) >= registersNeeded) - { // At this point, for 'n' registers requirement, if Rm+1, Rm+2, Rm+3, ..., Rm+k are // available, create the mask only for Rm+1, Rm+2, ..., Rm+(k-n+1) to convey that it // is safe to assign any of those registers, but not beyond that. @@ -194,7 +190,6 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP candidates, RefPosition consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; - DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; // If we don't find consecutive registers, also track which registers we can pick so @@ -223,7 +218,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP candidates, RefPosition trackForBusyCandidates = false; consecutiveResultForBusy = RBM_NONE; } - else + else if (trackForBusyCandidates) { // We reached a set of registers where there are not enough consecutive registers. // Move a registersNeeded size window for all the available registers and track for which @@ -296,7 +291,6 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP candidates, RefPosition } currAvailableRegs &= ~endMask; } while (currAvailableRegs != RBM_NONE); - } if (overallResult != RBM_NONE) { From 79e0bd5b6178fb002d59108f8c7a8c481acc749b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 20 Mar 2023 21:01:08 -0700 Subject: [PATCH 094/125] jit format --- src/coreclr/jit/lsra.cpp | 3 ++- src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 6 ++++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 2d9f39574c5711..83690e7b6c04e8 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -11708,7 +11708,8 @@ void LinearScan::RegisterSelection::try_SPILL_COST() // Can and should the interval in this register be spilled for this one, // if we don't find a better alternative? #ifdef TARGET_ARM64 - if (linearScan->isRefPositionActive(assignedInterval->recentRefPosition, thisLocation) && (assignedInterval->recentRefPosition->needsConsecutive)) + if (linearScan->isRefPositionActive(assignedInterval->recentRefPosition, thisLocation) && + (assignedInterval->recentRefPosition->needsConsecutive)) { continue; } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 8570a298745f0f..1de4fa1e65c266 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1196,7 +1196,7 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ #if defined(TARGET_ARM64) - bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); + bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition); #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index a1e19a2f6890a1..08704147d6a37b 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -376,8 +376,10 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP candidates, RefPosition // // // Among `consecutiveResultForBusy`, we could shortlist the registers that are beneficial from "busy register - // selection" heuristics perspective. However, we would need to add logic of try_SPILL_COST(), try_FAR_NEXT_REF(), - // etc. here which would complicate things. Instead, we just go with option# 1 and select registers based on fewer + // selection" heuristics perspective. However, we would need to add logic of try_SPILL_COST(), + // try_FAR_NEXT_REF(), + // etc. here which would complicate things. Instead, we just go with option# 1 and select registers based on + // fewer // number of registers that has to be spilled. // From a44cf605975e67c1029206fc80f9ea0247a59138 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 21 Mar 2023 22:14:35 -0700 Subject: [PATCH 095/125] Fix a bug in canAssignNextConsecutiveRegisters() --- src/coreclr/jit/lsraarm64.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 08704147d6a37b..bc2cc11a48c2f1 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -117,7 +117,7 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit // // Arguments: // firstRefPosition - First refPosition of the series of consecutive registers. -// regToAssign - Register assigned to the first refposition. +// firstRegAssigned - Register assigned to the first refposition. // // Returns: // True if all the consecutive registers starting from `firstRegAssigned` are assignable. @@ -137,14 +137,21 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition regToAssign = regToAssign == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(regToAssign); if (!isFree(getRegisterRecord(regToAssign))) { + if (nextRefPosition->refType == RefTypeUpperVectorRestore) + { + nextRefPosition = getNextConsecutiveRefPosition(nextRefPosition); + } + // If regToAssign is not free, check if it is already assigned to the interval corresponding // to the subsequent nextRefPosition. If yes, it would just use regToAssign for that nextRefPosition. if ((nextRefPosition->getInterval() != nullptr) && (nextRefPosition->getInterval()->assignedReg != nullptr) && - ((nextRefPosition->getInterval()->assignedReg->regNum != regToAssign))) + ((nextRefPosition->getInterval()->assignedReg->regNum == regToAssign))) { - return false; + continue; } + + return false; } } while (++i != registersCount); From 5fefae679dc4353f8f3b75c30739ba645b0bda51 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 21 Mar 2023 22:15:28 -0700 Subject: [PATCH 096/125] Add filterConsecutiveCandidates() and perform free/busy candidates scan --- src/coreclr/jit/lsra.h | 3 +- src/coreclr/jit/lsraarm64.cpp | 276 +++++++++++++++------------------- 2 files changed, 127 insertions(+), 152 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 1de4fa1e65c266..f822b7792d08ec 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1198,7 +1198,8 @@ class LinearScan : public LinearScanInterface #if defined(TARGET_ARM64) bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); - regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition); + regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition, regMaskTP* busyCandidates); + regMaskTP filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, regMaskTP* allConsecutiveCandidates); #endif // TARGET_ARM64 regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index bc2cc11a48c2f1..ca7c31775e1589 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -159,147 +159,114 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition } //------------------------------------------------------------------------ -// getConsecutiveCandidates: Returns the mask of all the consecutive candidates -// for given refPosition. For first RefPosition of a series of refpositions that needs -// consecutive registers, then returns only the mask such that it satisfies the need -// of having free consecutive registers. If free consecutive registers are not available -// it finds such a series that needs fewer registers spilling. +// filterConsecutiveCandidates: Given `candidates`, check if `registersNeeded` consecutive +// registers are available in it, and if yes, returns first bit set of every possible series. // // Arguments: -// candidates - Register assigned to the first refposition. -// refPosition - Number of registers to check. +// candidates - Set of availble candidates. +// registersNeeded - Number of consecutive registers needed. +// allConsecutiveCandidates - Mask returned containing all bits set for possible consecutive register candidates. // // Returns: -// Register mask of consecutive registers. +// From `candidates`, the mask of series of consecutive registers of `registersNeeded` size with just the first-bit set. // -regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition) +regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, regMaskTP* allConsecutiveCandidates) { - assert(compiler->info.needsConsecutiveRegisters); - regMaskTP result = candidates & m_AvailableRegs; - if (!refPosition->isFirstRefPositionOfConsecutiveRegisters() || (result == RBM_NONE)) + if (BitOperations::PopCount(candidates) < registersNeeded) { - return result; + // There is no way the register demanded can be satisfied for this refposition + // based on the candidates from which it can allocate a register. + return RBM_NONE; } - unsigned int registersNeeded = refPosition->regCount; - regMaskTP currAvailableRegs = result; - regMaskTP overallResult = RBM_NONE; - regMaskTP consecutiveResult = RBM_NONE; - regMaskTP consecutiveResultForBusy = RBM_NONE; - regMaskTP busyRegsInThisLocation = regsBusyUntilKill | regsInUseThisLocation; + regMaskTP currAvailableRegs = candidates; + regMaskTP overallResult = RBM_NONE; + regMaskTP consecutiveResult = RBM_NONE; + regMaskTP busyRegsInThisLocation = regsBusyUntilKill | regsInUseThisLocation; // At this point, for 'n' registers requirement, if Rm+1, Rm+2, Rm+3, ..., Rm+k are // available, create the mask only for Rm+1, Rm+2, ..., Rm+(k-n+1) to convey that it // is safe to assign any of those registers, but not beyond that. -#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ - regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ - regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ - consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ +#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ + regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ + regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; - DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; + DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; - // If we don't find consecutive registers, also track which registers we can pick so - // as to reduce the number of registers we will have to spill, to accomodate the - // request of the consecutive registers. - bool trackForBusyCandidates = true; - int maxSpillRegs = registersNeeded; - regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; + // If we don't find consecutive registers, also track which registers we can pick so + // as to reduce the number of registers we will have to spill, to accomodate the + // request of the consecutive registers. + regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; - do - { - // From LSB, find the first available register (bit `1`) - BitScanForward64(®AvailableStartIndex, static_cast(currAvailableRegs)); - regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; - - // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. - regMaskTP maskProcessed = ~(currAvailableRegs | startMask); - - // From regAvailableStart, find the first unavailable register (bit `0`). - if (maskProcessed == 0) - { - regAvailableEndIndex = 64; - if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) - { - AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, currAvailableRegs); - trackForBusyCandidates = false; - consecutiveResultForBusy = RBM_NONE; - } - else if (trackForBusyCandidates) - { - // We reached a set of registers where there are not enough consecutive registers. - // Move a registersNeeded size window for all the available registers and track for which - // one we can spill least number of registers. - - for (DWORD i = regAvailableStartIndex; i < regAvailableEndIndex; i++) - { - regMaskTP maskForCurRange = registersNeededMask << i; - if ((maskForCurRange & busyRegsInThisLocation) != RBM_NONE) - { - // If any register between i and (i + registersNeeded) contains one or more - // register that are busy, then we cannot that entire range. - continue; - } - int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange) + 1; - - if (curSpillRegs < maxSpillRegs) - { - // We found a series that will need fewer registers to be spilled. - // Reset whatever we found so far and start accumulating the result again. - consecutiveResultForBusy = RBM_NONE; - maxSpillRegs = curSpillRegs; - } + do + { + // From LSB, find the first available register (bit `1`) + BitScanForward64(®AvailableStartIndex, static_cast(currAvailableRegs)); + regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; - consecutiveResultForBusy |= 1ULL << i; - } - } - break; - } - else - { - BitScanForward64(®AvailableEndIndex, static_cast(maskProcessed)); - } - regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; + // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. + regMaskTP maskProcessed = ~(currAvailableRegs | startMask); - // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available - // If they are equal to or greater than our register requirements, then add all of them to the result. + // From regAvailableStart, find the first unavailable register (bit `0`). + if (maskProcessed == 0) + { + regAvailableEndIndex = 64; if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) { - AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, (endMask & ~startMask)); - trackForBusyCandidates = false; - consecutiveResultForBusy = RBM_NONE; + AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, currAvailableRegs); } - else if (trackForBusyCandidates) - { - // We reached a set of registers where there are not enough consecutive registers. - // Move a registersNeeded size window for all the available registers and track for which - // one we can spill least number of registers. + break; + } + else + { + BitScanForward64(®AvailableEndIndex, static_cast(maskProcessed)); + } + regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; - for (DWORD i = regAvailableStartIndex; i < regAvailableEndIndex; i++) - { - regMaskTP maskForCurRange = registersNeededMask << i; - if ((maskForCurRange & busyRegsInThisLocation) != RBM_NONE) - { - // If any register between i and (i + registersNeeded) contains one or more - // register that are busy, then we cannot that entire range. - continue; - } - int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange) + 1; - if (curSpillRegs < maxSpillRegs) - { - // We found a series that will need fewer registers to be spilled. - // Reset whatever we found so far and start accumulating the result again. - consecutiveResultForBusy = RBM_NONE; - maxSpillRegs = curSpillRegs; - } + // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available + // If they are equal to or greater than our register requirements, then add all of them to the result. + if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) + { + AppendConsecutiveMask(regAvailableStartIndex, regAvailableEndIndex, (endMask & ~startMask)); + } + currAvailableRegs &= ~endMask; + } while (currAvailableRegs != RBM_NONE); - consecutiveResultForBusy |= 1ULL << i; - } - } - currAvailableRegs &= ~endMask; - } while (currAvailableRegs != RBM_NONE); + *allConsecutiveCandidates = overallResult; + return consecutiveResult; +} - if (overallResult != RBM_NONE) +//------------------------------------------------------------------------ +// getConsecutiveCandidates: Returns the mask of all the consecutive candidates +// for given refPosition. For first RefPosition of a series of refpositions that needs +// consecutive registers, then returns only the mask such that it satisfies the need +// of having free consecutive registers. If free consecutive registers are not available +// it finds such a series that needs fewer registers spilling. +// +// Arguments: +// allCandidates - Register assigned to the first refposition. +// refPosition - Number of registers to check. +// +// Returns: +// Register mask of consecutive registers. +// +regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, RefPosition* refPosition, regMaskTP* busyCandidates) +{ + assert(compiler->info.needsConsecutiveRegisters); + regMaskTP freeCandidates = allCandidates & m_AvailableRegs; + if (!refPosition->isFirstRefPositionOfConsecutiveRegisters() || (freeCandidates == RBM_NONE)) + { + return freeCandidates; + } + + *busyCandidates = RBM_NONE; + regMaskTP overallResult; + unsigned int registersNeeded = refPosition->regCount; + + regMaskTP consecutiveResultForFree = filterConsecutiveCandidates(freeCandidates, registersNeeded, &overallResult); + if (consecutiveResultForFree != RBM_NONE) { // One last time, check if subsequent refpositions (all refpositions except the first for which // we assigned above) already have consecutive registers assigned. If yes, and if one of the @@ -353,47 +320,54 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP candidates, RefPosition if ((overallResult & remainingRegsMask) != RBM_NONE) { // If remaining registers are available, then just set the firstRegister mask - consecutiveResult = 1ULL << (firstRegNum - 1); + consecutiveResultForFree = 1ULL << (firstRegNum - 1); } } + + return consecutiveResultForFree; } - else + + // There are registers available but they are not consecutive. + // Here are some options to address them: + // + // 1. Scan once again the available registers and find a set which has maximum register available. + // In other words, try to find register sequence that needs fewer registers to be spilled. This + // will give optimal CQ. + // + // 2. Check if some of the refpositions in the series are already in *somewhat* consecutive registers + // and if yes, assign that register sequence. That way, we will avoid copying values of + // refpositions that are already positioned in the desired registers. Checking this is beneficial + // only if it can happen frequently. So for RefPositions , it should + // be that, RP# 6 is already in V14 and RP# 8 is already in V16. But this can be rare (not tested). + // In future, if we see such cases being hit, we could use this heuristics. + // + // 3. Give one of the free register to the first position and the algorithm will + // give the subsequent consecutive registers (free or busy) to the remaining refpositions + // of the series. This may not give optimal CQ however. + // + // 4. Return the set of available registers and let selection heuristics pick one of them to get + // assigned to the first refposition. Remaining refpositions will be assigned to the subsequent + // registers (if busy, they will be spilled), similar to #3 above and will not give optimal CQ. + // + // + // Among `consecutiveResultForBusy`, we could shortlist the registers that are beneficial from "busy register + // selection" heuristics perspective. However, we would need to add logic of try_SPILL_COST(), + // try_FAR_NEXT_REF(), etc. here which would complicate things. Instead, we just go with option# 1 and select + // registers based on fewer number of registers that has to be spilled. + // + regMaskTP consecutiveResultForBusy = filterConsecutiveCandidates(allCandidates, registersNeeded, &overallResult); + regMaskTP mixConsecutiveResult = m_AvailableRegs & consecutiveResultForBusy; + if (mixConsecutiveResult != RBM_NONE) { - // There are enough registers available but they are not consecutive. - // Here are some options to address them: - // - // 1. Scan once again the available registers and find a set which has maximum register available. - // In other words, try to find register sequence that needs fewer registers to be spilled. This - // will give optimal CQ. - // - // 2. Check if some of the refpositions in the series are already in *somewhat* consecutive registers - // and if yes, assign that register sequence. That way, we will avoid copying values of - // refpositions that are already positioned in the desired registers. Checking this is beneficial - // only if it can happen frequently. So for RefPositions , it should - // be that, RP# 6 is already in V14 and RP# 7 is already in V16. But this can be rare (not tested). - // In future, if we see such cases being hit, we could use this heuristics. - // - // 3. Give one of the free register to the first position and the algorithm will - // give the subsequent consecutive registers (free or busy) to the remaining refpositions - // of the series. This may not give optimal CQ however. - // - // 4. Return the set of available registers and let selection heuristics pick one of them to get - // assigned to the first refposition. Remaining refpositions will be assigned to the subsequent - // registers (if busy, they will be spilled), similar to #3 above and will not give optimal CQ. - // - // - // Among `consecutiveResultForBusy`, we could shortlist the registers that are beneficial from "busy register - // selection" heuristics perspective. However, we would need to add logic of try_SPILL_COST(), - // try_FAR_NEXT_REF(), - // etc. here which would complicate things. Instead, we just go with option# 1 and select registers based on - // fewer - // number of registers that has to be spilled. - // - - consecutiveResult = consecutiveResultForBusy; + // We did not find free consecutive candidates, however we found some registers among the `allCandidates` that + // are mix of free and busy. Since `busyCandidates` just has bit set for first register of such series, return + // the mask that starts with free register, if possible. The busy registers will be spilled during assignment of + // subsequent refposition. + *busyCandidates = mixConsecutiveResult; } - return consecutiveResult; + *busyCandidates = consecutiveResultForBusy; + return RBM_NONE; } //------------------------------------------------------------------------ // BuildNode: Build the RefPositions for a node From 2a5e52c86dbc8644a4add23f3553f9eed105b9ac Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 21 Mar 2023 22:16:00 -0700 Subject: [PATCH 097/125] Consume the new free/busy consecutive candidates method --- src/coreclr/jit/lsra.cpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 83690e7b6c04e8..8b82b62195148f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12280,7 +12280,28 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, #ifdef TARGET_ARM64 if (needsConsecutiveRegisters) { - freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition); + regMaskTP busyConsecutiveCandidates = RBM_NONE; + freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition, &busyConsecutiveCandidates); + if (freeCandidates == RBM_NONE) + { + // We did not find free candidates. We will use the busy candidates, if + // they are consecutive. + if (refPosition->isFirstRefPositionOfConsecutiveRegisters()) + { + candidates = busyConsecutiveCandidates; + } + else + { + // We should have a single candidate that will be used for subsequent + // refpositions. + assert((refPosition->refType == RefTypeUpperVectorRestore) || (genCountBits(candidates) == 1)); + } + + if (candidates == RBM_NONE) + { + noway_assert(!"Not sufficient consecutive registers available."); + } + } } else #endif // TARGET_ARM64 From a17b44f82a3c32b8a669180d6778a49389a19779 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 21 Mar 2023 22:17:01 -0700 Subject: [PATCH 098/125] Handle case where 'copyReg == assignedReg' --- src/coreclr/jit/lsra.cpp | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 8b82b62195148f..b064f8bb135349 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5397,13 +5397,13 @@ void LinearScan::allocateRegisters() { // It doesn't satisfy, so do a copyReg for the first RefPosition to such a register, so // it would be possible to allocate consecutive registers to the subsequent RefPositions. - assert((currentRefPosition.refType == RefTypeUse) || - (currentRefPosition.refType == RefTypeUpperVectorRestore)); regNumber copyReg = assignCopyReg(¤tRefPosition); + + if (copyReg != assignedRegister) + { lastAllocatedRefPosition = ¤tRefPosition; regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); - setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); // For consecutive register, it doesn't matter what the assigned register was. // We have just assigned it `copyRegMask` and that's the one in-use, and not the @@ -5421,8 +5421,8 @@ void LinearScan::allocateRegisters() } else { - INDEBUG( - dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister)); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, + assignedRegister)); regsToFree |= copyRegMask | assignedRegMask; } } @@ -5448,6 +5448,19 @@ void LinearScan::allocateRegisters() clearSpillCost(copyReg, currentInterval->registerType); updateNextIntervalRef(assignedRegister, currentInterval); updateSpillCost(assignedRegister, currentInterval); + } + else + { + // We first noticed that with assignedRegister, we were not getting consecutive registers assigned, so we + // decide to perform copyReg. However, copyReg assigned same register because there were no other free registers + // that would satisfy the consecutive registers requirements. In such case, just revert the copyReg state update. + currentRefPosition.copyReg = false; + + // Current assignedRegister satisfies the consecutive registers requirements + currentRefPosition.registerAssignment = assignedRegBit; + } + + setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); continue; } } From 3c390d83be8a0ce09eba1b65e30adcc29d2fc88a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 21 Mar 2023 22:18:11 -0700 Subject: [PATCH 099/125] Misc. cleanup --- src/coreclr/jit/lsra.cpp | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index b064f8bb135349..69b0b6147265cc 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3126,11 +3126,8 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R { canSpill = canSpillReg(physRegRecord, refLocation); } - if (!canSpill) - { - return false; - } - return true; + + return canSpill; } // Grab a register to use to copy and then immediately use. @@ -5493,7 +5490,7 @@ void LinearScan::allocateRegisters() { regNumber copyReg; #ifdef TARGET_ARM64 - if (hasConsecutiveRegister && currentRefPosition.needsConsecutive) + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive && currentRefPosition.refType == RefTypeUse) { copyReg = assignCopyReg(¤tRefPosition); } @@ -5512,17 +5509,11 @@ void LinearScan::allocateRegisters() { if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) { - // If the first RefPosition was not assigned to the register we wanted and we added - // a copyReg for it, then allocate the subsequent RefPositions with the consecutive + // If the first RefPosition was not assigned to the register that we wanted, we added + // a copyReg for it. Allocate subsequent RefPositions with the consecutive // registers. setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); } - else - { - // For non-first RefPositions, if they were not in the register that we wanted, we - // added a copyReg for them to move it to the desired register. No further action is - // needed. - } // For consecutive register, it doesn't matter what the assigned register was. // We have just assigned it `copyRegMask` and that's the one in-use, and not the @@ -5608,11 +5599,6 @@ void LinearScan::allocateRegisters() assignedRegister = REG_NA; RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); currentRefPosition.registerAssignment = allRegs(currentInterval->registerType); - unassignPhysRegNoSpill(physRegRecord); - } - else - { - setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); } } } @@ -5630,12 +5616,6 @@ void LinearScan::allocateRegisters() // If the subsequent refPosition is not assigned to the consecutive register, then reassign the // right consecutive register. assignedRegister = REG_NA; - if (assignedRegBit != RBM_NONE) - { - // Also unassign the register currently assigned to it. - RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); - unassignPhysRegNoSpill(physRegRecord); - } } } } From a9995e6504df8bfbc909ae83e14c9f2b79c2ebd5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 21 Mar 2023 22:22:30 -0700 Subject: [PATCH 100/125] Include LsraExtraFPSetForConsecutive for stress regs --- src/coreclr/jit/lsra.cpp | 7 +++++++ src/coreclr/jit/lsra.h | 1 + src/coreclr/jit/lsrabuild.cpp | 6 ++++++ 3 files changed, 14 insertions(+) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 69b0b6147265cc..a36f30c9390a20 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -496,6 +496,13 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) { mask |= refPosition->registerAssignment; } + +#ifdef TARGET_ARM64 + if ((refPosition != nullptr) && refPosition->isFirstRefPositionOfConsecutiveRegisters()) + { + mask |= LsraExtraFPSetForConsecutive; + } +#endif; } return mask; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index f822b7792d08ec..8532f21595cefe 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -770,6 +770,7 @@ class LinearScan : public LinearScanInterface #elif defined(TARGET_ARM64) static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); + static const regMaskTP LsraExtraFPSetForConsecutive = (RBM_V3 | RBM_V5 | RBM_V7); #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index b1e769f4032b98..9b6a6b48b62151 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1848,6 +1848,12 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc regMaskTP calleeSaveMask = calleeSaveRegs(interval->registerType); newRefPosition->registerAssignment = getConstrainedRegMask(oldAssignment, calleeSaveMask, minRegCountForRef); +#ifdef TARGET_ARM64 + if (newRefPosition->isFirstRefPositionOfConsecutiveRegisters()) + { + newRefPosition->registerAssignment |= LsraExtraFPSetForConsecutive; + } +#endif; if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && !interval->isLocalVar) { From ae2e633624ab82643dae4b2b622f8dbd5a8b2f91 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 21 Mar 2023 22:23:05 -0700 Subject: [PATCH 101/125] handle case where 'assignedInterval == nullptr' for try_SPILL_COST() --- src/coreclr/jit/lsra.cpp | 105 +++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 47 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index a36f30c9390a20..fa454e6707b64a 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3110,7 +3110,7 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R // We shouldn't be calling this if we haven't already determined that the register is not // busy until the next kill. assert(!isRegBusy(physRegRecord->regNum, current->registerType)); - // We should already have determined that the register isn't actively in use. +// We should already have determined that the register isn't actively in use. #ifdef TARGET_ARM64 assert(!isRegInUse(physRegRecord->regNum, current->registerType) || refPosition->needsConsecutive); #else @@ -5401,57 +5401,57 @@ void LinearScan::allocateRegisters() { // It doesn't satisfy, so do a copyReg for the first RefPosition to such a register, so // it would be possible to allocate consecutive registers to the subsequent RefPositions. - regNumber copyReg = assignCopyReg(¤tRefPosition); + regNumber copyReg = assignCopyReg(¤tRefPosition); if (copyReg != assignedRegister) { - lastAllocatedRefPosition = ¤tRefPosition; - regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); - regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + lastAllocatedRefPosition = ¤tRefPosition; + regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); + regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); - // For consecutive register, it doesn't matter what the assigned register was. - // We have just assigned it `copyRegMask` and that's the one in-use, and not the - // one that was assigned previously. + // For consecutive register, it doesn't matter what the assigned register was. + // We have just assigned it `copyRegMask` and that's the one in-use, and not the + // one that was assigned previously. - regsInUseThisLocation |= copyRegMask; - if (currentRefPosition.lastUse) - { - if (currentRefPosition.delayRegFree) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, - assignedRegister)); - delayRegsToFree |= copyRegMask | assignedRegMask; - regsInUseNextLocation |= copyRegMask | assignedRegMask; - } - else + regsInUseThisLocation |= copyRegMask; + if (currentRefPosition.lastUse) { + if (currentRefPosition.delayRegFree) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, + assignedRegister)); + delayRegsToFree |= copyRegMask | assignedRegMask; + regsInUseNextLocation |= copyRegMask | assignedRegMask; + } + else + { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister)); - regsToFree |= copyRegMask | assignedRegMask; + regsToFree |= copyRegMask | assignedRegMask; + } } - } - else - { - copyRegsToFree |= copyRegMask; - if (currentRefPosition.delayRegFree) + else { - regsInUseNextLocation |= copyRegMask | assignedRegMask; + copyRegsToFree |= copyRegMask; + if (currentRefPosition.delayRegFree) + { + regsInUseNextLocation |= copyRegMask | assignedRegMask; + } } - } - // If this is a tree temp (non-localVar) interval, we will need an explicit move. - // Note: In theory a moveReg should cause the Interval to now have the new reg as its - // assigned register. However, that's not currently how this works. - // If we ever actually move lclVar intervals instead of copying, this will need to change. - if (!currentInterval->isLocalVar) - { - currentRefPosition.moveReg = true; - currentRefPosition.copyReg = false; - } - clearNextIntervalRef(copyReg, currentInterval->registerType); - clearSpillCost(copyReg, currentInterval->registerType); - updateNextIntervalRef(assignedRegister, currentInterval); - updateSpillCost(assignedRegister, currentInterval); + // If this is a tree temp (non-localVar) interval, we will need an explicit move. + // Note: In theory a moveReg should cause the Interval to now have the new reg as its + // assigned register. However, that's not currently how this works. + // If we ever actually move lclVar intervals instead of copying, this will need to change. + if (!currentInterval->isLocalVar) + { + currentRefPosition.moveReg = true; + currentRefPosition.copyReg = false; + } + clearNextIntervalRef(copyReg, currentInterval->registerType); + clearSpillCost(copyReg, currentInterval->registerType); + updateNextIntervalRef(assignedRegister, currentInterval); + updateSpillCost(assignedRegister, currentInterval); } else { @@ -11694,8 +11694,8 @@ void LinearScan::RegisterSelection::try_SPILL_COST() // The spill weight for the best candidate we've found so far. weight_t bestSpillWeight = FloatingPointUtils::infinite_double(); // True if we found registers with lower spill weight than this refPosition. - bool foundLowerSpillWeight = false; - LsraLocation thisLocation = refPosition->nodeLocation; + bool foundLowerSpillWeight = false; + LsraLocation thisLocation = refPosition->nodeLocation; for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE;) { @@ -11705,11 +11705,23 @@ void LinearScan::RegisterSelection::try_SPILL_COST() RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; Interval* assignedInterval = spillCandidateRegRecord->assignedInterval; - // Can and should the interval in this register be spilled for this one, - // if we don't find a better alternative? #ifdef TARGET_ARM64 - if (linearScan->isRefPositionActive(assignedInterval->recentRefPosition, thisLocation) && - (assignedInterval->recentRefPosition->needsConsecutive)) + if (assignedInterval == nullptr) + { + // Ideally we should not be seeing this candidate because it is not assigned to + // any interval. But based on that, we cannot determine if it is a good spill + // candidate or not. Skip processing it. + continue; + } +#endif + + RefPosition* recentRefPosition = assignedInterval->recentRefPosition; + +// Can and should the interval in this register be spilled for this one, +// if we don't find a better alternative? +#ifdef TARGET_ARM64 + if ((recentRefPosition != nullptr) && linearScan->isRefPositionActive(recentRefPosition, thisLocation) && + (recentRefPosition->needsConsecutive)) { continue; } @@ -11726,7 +11738,6 @@ void LinearScan::RegisterSelection::try_SPILL_COST() } weight_t currentSpillWeight = 0; - RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr; if ((recentRefPosition != nullptr) && (recentRefPosition->RegOptional() && !(assignedInterval->isLocalVar && recentRefPosition->IsActualRef()))) { From 02f8ad257ae3c098952acea95831a02aa190da1e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 06:38:37 -0700 Subject: [PATCH 102/125] fix build error --- src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsrabuild.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index fa454e6707b64a..3a3350744ed6db 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -502,7 +502,7 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) { mask |= LsraExtraFPSetForConsecutive; } -#endif; +#endif } return mask; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 9b6a6b48b62151..bb01ff7233652e 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1853,7 +1853,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc { newRefPosition->registerAssignment |= LsraExtraFPSetForConsecutive; } -#endif; +#endif if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && !interval->isLocalVar) { From 984c6eedd46968eaef2d2fb71121c4d3966744b1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 06:39:25 -0700 Subject: [PATCH 103/125] Call consecutiveCandidates() only for first refposition --- src/coreclr/jit/lsra.cpp | 12 ++++++------ src/coreclr/jit/lsraarm64.cpp | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 3a3350744ed6db..7664ed1cceb08f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12292,28 +12292,28 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, if (needsConsecutiveRegisters) { regMaskTP busyConsecutiveCandidates = RBM_NONE; + if (refPosition->isFirstRefPositionOfConsecutiveRegisters()) + { freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition, &busyConsecutiveCandidates); if (freeCandidates == RBM_NONE) - { - // We did not find free candidates. We will use the busy candidates, if - // they are consecutive. - if (refPosition->isFirstRefPositionOfConsecutiveRegisters()) { candidates = busyConsecutiveCandidates; } + } else { // We should have a single candidate that will be used for subsequent // refpositions. assert((refPosition->refType == RefTypeUpperVectorRestore) || (genCountBits(candidates) == 1)); + + freeCandidates = candidates & linearScan->m_AvailableRegs; } - if (candidates == RBM_NONE) + if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE)) { noway_assert(!"Not sufficient consecutive registers available."); } } - } else #endif // TARGET_ARM64 { diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index ca7c31775e1589..8989e29162f3c9 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -255,8 +255,9 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, unsigned regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, RefPosition* refPosition, regMaskTP* busyCandidates) { assert(compiler->info.needsConsecutiveRegisters); + assert(refPosition->isFirstRefPositionOfConsecutiveRegisters()); regMaskTP freeCandidates = allCandidates & m_AvailableRegs; - if (!refPosition->isFirstRefPositionOfConsecutiveRegisters() || (freeCandidates == RBM_NONE)) + if (freeCandidates == RBM_NONE) { return freeCandidates; } From 8fe130a92e0f34da3d763a44de5f365921f15679 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 06:39:55 -0700 Subject: [PATCH 104/125] Only perform special handling for non-uppervectorrestore --- src/coreclr/jit/lsra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 7664ed1cceb08f..1151e1de638d29 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5609,7 +5609,7 @@ void LinearScan::allocateRegisters() } } } - else + else if (currentRefPosition.refType == RefTypeUse) { // remaining refPosition of the series... if (assignedRegBit == currentRefPosition.registerAssignment) From 7f8e77fab4f649b15978b35f22a07be96ae0cf72 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 06:43:24 -0700 Subject: [PATCH 105/125] jit format --- src/coreclr/jit/lsra.cpp | 34 ++++++++++++++++++---------------- src/coreclr/jit/lsra.h | 8 +++++--- src/coreclr/jit/lsraarm64.cpp | 31 ++++++++++++++++++------------- 3 files changed, 41 insertions(+), 32 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 1151e1de638d29..7ddd1af8a6e50c 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -5455,9 +5455,10 @@ void LinearScan::allocateRegisters() } else { - // We first noticed that with assignedRegister, we were not getting consecutive registers assigned, so we - // decide to perform copyReg. However, copyReg assigned same register because there were no other free registers - // that would satisfy the consecutive registers requirements. In such case, just revert the copyReg state update. + // We first noticed that with assignedRegister, we were not getting consecutive registers + // assigned, so we decide to perform copyReg. However, copyReg assigned same register + // because there were no other free registers that would satisfy the consecutive registers + // requirements. In such case, just revert the copyReg state update. currentRefPosition.copyReg = false; // Current assignedRegister satisfies the consecutive registers requirements @@ -5497,7 +5498,8 @@ void LinearScan::allocateRegisters() { regNumber copyReg; #ifdef TARGET_ARM64 - if (hasConsecutiveRegister && currentRefPosition.needsConsecutive && currentRefPosition.refType == RefTypeUse) + if (hasConsecutiveRegister && currentRefPosition.needsConsecutive && + currentRefPosition.refType == RefTypeUse) { copyReg = assignCopyReg(¤tRefPosition); } @@ -11737,7 +11739,7 @@ void LinearScan::RegisterSelection::try_SPILL_COST() continue; } - weight_t currentSpillWeight = 0; + weight_t currentSpillWeight = 0; if ((recentRefPosition != nullptr) && (recentRefPosition->RegOptional() && !(assignedInterval->isLocalVar && recentRefPosition->IsActualRef()))) { @@ -12294,26 +12296,26 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, regMaskTP busyConsecutiveCandidates = RBM_NONE; if (refPosition->isFirstRefPositionOfConsecutiveRegisters()) { - freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition, &busyConsecutiveCandidates); - if (freeCandidates == RBM_NONE) + freeCandidates = linearScan->getConsecutiveCandidates(candidates, refPosition, &busyConsecutiveCandidates); + if (freeCandidates == RBM_NONE) { candidates = busyConsecutiveCandidates; } } - else - { - // We should have a single candidate that will be used for subsequent - // refpositions. - assert((refPosition->refType == RefTypeUpperVectorRestore) || (genCountBits(candidates) == 1)); + else + { + // We should have a single candidate that will be used for subsequent + // refpositions. + assert((refPosition->refType == RefTypeUpperVectorRestore) || (genCountBits(candidates) == 1)); freeCandidates = candidates & linearScan->m_AvailableRegs; - } + } if ((freeCandidates == RBM_NONE) && (candidates == RBM_NONE)) - { - noway_assert(!"Not sufficient consecutive registers available."); - } + { + noway_assert(!"Not sufficient consecutive registers available."); } + } else #endif // TARGET_ARM64 { diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 8532f21595cefe..f3c07c54866ad5 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -768,8 +768,8 @@ class LinearScan : public LinearScanInterface static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) - static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); - static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); + static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); + static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); static const regMaskTP LsraExtraFPSetForConsecutive = (RBM_V3 | RBM_V5 | RBM_V7); #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); @@ -1200,7 +1200,9 @@ class LinearScan : public LinearScanInterface bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition, regMaskTP* busyCandidates); - regMaskTP filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, regMaskTP* allConsecutiveCandidates); + regMaskTP filterConsecutiveCandidates(regMaskTP candidates, + unsigned int registersNeeded, + regMaskTP* allConsecutiveCandidates); #endif // TARGET_ARM64 regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 8989e29162f3c9..2e2ea9c68e9363 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -168,9 +168,12 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition // allConsecutiveCandidates - Mask returned containing all bits set for possible consecutive register candidates. // // Returns: -// From `candidates`, the mask of series of consecutive registers of `registersNeeded` size with just the first-bit set. +// From `candidates`, the mask of series of consecutive registers of `registersNeeded` size with just the first-bit +// set. // -regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, regMaskTP* allConsecutiveCandidates) +regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, + unsigned int registersNeeded, + regMaskTP* allConsecutiveCandidates) { if (BitOperations::PopCount(candidates) < registersNeeded) { @@ -179,18 +182,18 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, unsigned return RBM_NONE; } - regMaskTP currAvailableRegs = candidates; - regMaskTP overallResult = RBM_NONE; - regMaskTP consecutiveResult = RBM_NONE; - regMaskTP busyRegsInThisLocation = regsBusyUntilKill | regsInUseThisLocation; + regMaskTP currAvailableRegs = candidates; + regMaskTP overallResult = RBM_NONE; + regMaskTP consecutiveResult = RBM_NONE; + regMaskTP busyRegsInThisLocation = regsBusyUntilKill | regsInUseThisLocation; // At this point, for 'n' registers requirement, if Rm+1, Rm+2, Rm+3, ..., Rm+k are // available, create the mask only for Rm+1, Rm+2, ..., Rm+(k-n+1) to convey that it // is safe to assign any of those registers, but not beyond that. -#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ - regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ - regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ - consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ +#define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ + regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ + regMaskTP selectionEndMask = (1ULL << (regAvailableEndIndex - registersNeeded + 1)) - 1; \ + consecutiveResult |= availableRegistersMask & (selectionEndMask & ~selectionStartMask); \ overallResult |= availableRegistersMask; DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; @@ -198,7 +201,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, unsigned // If we don't find consecutive registers, also track which registers we can pick so // as to reduce the number of registers we will have to spill, to accomodate the // request of the consecutive registers. - regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; + regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; do { @@ -252,7 +255,9 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, unsigned // Returns: // Register mask of consecutive registers. // -regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, RefPosition* refPosition, regMaskTP* busyCandidates) +regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, + RefPosition* refPosition, + regMaskTP* busyCandidates) { assert(compiler->info.needsConsecutiveRegisters); assert(refPosition->isFirstRefPositionOfConsecutiveRegisters()); @@ -263,7 +268,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, RefPosit } *busyCandidates = RBM_NONE; - regMaskTP overallResult; + regMaskTP overallResult; unsigned int registersNeeded = refPosition->regCount; regMaskTP consecutiveResultForFree = filterConsecutiveCandidates(freeCandidates, registersNeeded, &overallResult); From 090bf2654df576702edcf41d6e331c411c80e14d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 07:43:16 -0700 Subject: [PATCH 106/125] Add impVectorTableLookup/impVectorTableLookupExtension --- src/coreclr/jit/compiler.h | 19 +++ src/coreclr/jit/gentree.cpp | 2 +- src/coreclr/jit/hwintrinsicarm64.cpp | 179 ++++++++++++++++----------- 3 files changed, 127 insertions(+), 73 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8443887073d849..eccf267c0e9d98 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3919,6 +3919,25 @@ class Compiler CorInfoType simdBaseJitType, var_types retType, unsigned simdSize); +#ifdef TARGET_ARM64 + GenTreeHWIntrinsic* impVectorTableLookup(GenTree* op1, + GenTree* op2, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + var_types argType, + var_types retType, + unsigned fieldCount); + GenTreeHWIntrinsic* impVectorTableLookupExtension(GenTree* op1, + GenTree* op2, + GenTree* op3, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + var_types argType, + var_types retType, + unsigned fieldCount); +#endif GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index abe3639c568022..691ff6ceabb76d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -22937,7 +22937,7 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, op2 = gtNewVconNode(type); op2->AsVecCon()->gtSimd16Val = vecCns; - return gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + return impVectorTableLookup(op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, type, type, 0); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 83cd5b2b096da1..0637104eb49e1b 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1879,7 +1879,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_AdvSimd_VectorTableLookup: case NI_AdvSimd_Arm64_VectorTableLookup: { - info.needsConsecutiveRegisters = true; assert(sig->numArgs == 2); CORINFO_ARG_LIST_HANDLE arg1 = sig->args; @@ -1890,49 +1889,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); op2 = getArgForHWIntrinsic(argType, argClass); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = impPopStack().val; - - if (op1->TypeGet() == TYP_STRUCT) - { - if (!op1->OperIs(GT_LCL_VAR)) - { - unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = impPopStack().val; + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); - impAssignTempGen(tmp, op1, CHECK_SPILL_NONE); - op1 = gtNewLclvNode(tmp, argType); - } - - LclVarDsc* op1VarDsc = lvaGetDesc(op1->AsLclVar()); - unsigned lclNum = lvaGetLclNum(op1VarDsc); - unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); - unsigned fieldSize = op1VarDsc->lvSize() / fieldCount; - var_types fieldType = TYP_SIMD16; - - GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - int offset = 0; - for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) - { - GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fieldType, offset); - fieldList->AddField(this, fldNode, offset, fieldType); - - offset += fieldSize; - } - op1 = fieldList; + retNode = + impVectorTableLookup(op1, op2, intrinsic, simdBaseJitType, simdSize, argType, retType, fieldCount); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - } - else - { - assert(op1->TypeGet() == TYP_SIMD16); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - } break; } case NI_AdvSimd_VectorTableLookupExtension: case NI_AdvSimd_Arm64_VectorTableLookupExtension: { - info.needsConsecutiveRegisters = true; assert(sig->numArgs == 3); CORINFO_ARG_LIST_HANDLE arg1 = sig->args; @@ -1944,52 +1912,119 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); op3 = getArgForHWIntrinsic(argType, argClass); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = impPopStack().val; - op1 = impPopStack().val; + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = impPopStack().val; + op1 = impPopStack().val; + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); - if (op2->TypeGet() == TYP_STRUCT) - { - if (!op2->OperIs(GT_LCL_VAR)) - { - unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); + retNode = impVectorTableLookupExtension(op1, op2, op3, intrinsic, simdBaseJitType, simdSize, argType, + retType, fieldCount); + } + default: + { + return nullptr; + } + } - impAssignTempGen(tmp, op2, CHECK_SPILL_NONE); - op2 = gtNewLclvNode(tmp, argType); - } + return retNode; +} - LclVarDsc* op2VarDsc = lvaGetDesc(op2->AsLclVar()); - unsigned lclNum = lvaGetLclNum(op2VarDsc); - unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); - unsigned fieldSize = op2VarDsc->lvSize() / fieldCount; - var_types fieldType = TYP_SIMD16; +GenTreeHWIntrinsic* Compiler::impVectorTableLookup(GenTree* op1, + GenTree* op2, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + var_types argType, + var_types retType, + unsigned fieldCount) +{ + info.needsConsecutiveRegisters = true; - GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - int offset = 0; - for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) - { - GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fieldType, offset); - fieldList->AddField(this, fldNode, offset, fieldType); + GenTreeHWIntrinsic* retNode = nullptr; - offset += fieldSize; - } - op2 = fieldList; + if (op1->TypeGet() == TYP_STRUCT) + { + if (!op1->OperIs(GT_LCL_VAR)) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - } - else - { - assert(op2->TypeGet() == TYP_SIMD16); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); - } - break; + impAssignTempGen(tmp, op1, CHECK_SPILL_NONE); + op1 = gtNewLclvNode(tmp, argType); } - default: + + LclVarDsc* op1VarDsc = lvaGetDesc(op1->AsLclVar()); + unsigned lclNum = lvaGetLclNum(op1VarDsc); + unsigned fieldSize = op1VarDsc->lvSize() / fieldCount; + var_types fieldType = TYP_SIMD16; + + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + int offset = 0; + for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) { - return nullptr; + GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fieldType, offset); + fieldList->AddField(this, fldNode, offset, fieldType); + + offset += fieldSize; } + op1 = fieldList; + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsicID, simdBaseJitType, simdSize); + } + else + { + assert(op1->TypeGet() == TYP_SIMD16); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsicID, simdBaseJitType, simdSize); } + return retNode; +} + +GenTreeHWIntrinsic* Compiler::impVectorTableLookupExtension(GenTree* op1, + GenTree* op2, + GenTree* op3, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + var_types argType, + var_types retType, + unsigned fieldCount) +{ + info.needsConsecutiveRegisters = true; + + GenTreeHWIntrinsic* retNode = nullptr; + if (op2->TypeGet() == TYP_STRUCT) + { + if (!op2->OperIs(GT_LCL_VAR)) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); + + impAssignTempGen(tmp, op2, CHECK_SPILL_NONE); + op2 = gtNewLclvNode(tmp, argType); + } + + LclVarDsc* op2VarDsc = lvaGetDesc(op2->AsLclVar()); + unsigned lclNum = lvaGetLclNum(op2VarDsc); + unsigned fieldSize = op2VarDsc->lvSize() / fieldCount; + var_types fieldType = TYP_SIMD16; + + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + int offset = 0; + for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) + { + GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fieldType, offset); + fieldList->AddField(this, fldNode, offset, fieldType); + + offset += fieldSize; + } + op2 = fieldList; + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); + } + else + { + assert(op2->TypeGet() == TYP_SIMD16); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); + } return retNode; } From c91bc77f1ef244478c8da4e2feff2f100df17ec5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 10:38:50 -0700 Subject: [PATCH 107/125] Add the missing break --- src/coreclr/jit/hwintrinsicarm64.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 0637104eb49e1b..1fc86fdd2032b8 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1919,6 +1919,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = impVectorTableLookupExtension(op1, op2, op3, intrinsic, simdBaseJitType, simdSize, argType, retType, fieldCount); + break; } default: { From 0c4d71f12ace9dde7209ca1db0aa47485b445b8a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 14:51:54 -0700 Subject: [PATCH 108/125] Update assert --- src/coreclr/jit/hwintrinsicarm64.cpp | 6 +++--- src/coreclr/jit/lsra.cpp | 4 ++-- src/coreclr/jit/lsraarm64.cpp | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 74b57e3d546dd2..ab2ffcd8ab3ffc 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1973,7 +1973,7 @@ GenTreeHWIntrinsic* Compiler::impVectorTableLookup(GenTree* op1, } else { - assert(op1->TypeGet() == TYP_SIMD16); + assert(varTypeIsSIMD(op1->TypeGet())); retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsicID, simdBaseJitType, simdSize); } return retNode; @@ -1997,7 +1997,7 @@ GenTreeHWIntrinsic* Compiler::impVectorTableLookupExtension(GenTree* op1, { if (!op2->OperIs(GT_LCL_VAR)) { - unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookupExtension temp tree")); impAssignTempGen(tmp, op2, CHECK_SPILL_NONE); op2 = gtNewLclvNode(tmp, argType); @@ -2023,7 +2023,7 @@ GenTreeHWIntrinsic* Compiler::impVectorTableLookupExtension(GenTree* op1, } else { - assert(op2->TypeGet() == TYP_SIMD16); + assert(varTypeIsSIMD(op1->TypeGet())); retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); } return retNode; diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 1534c733b37af5..74aa292789bdab 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1318,7 +1318,9 @@ PhaseStatus LinearScan::doLinearScan() } else #endif // TARGET_ARM64 + { allocateRegisters(); + } allocationPassComplete = true; compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC); @@ -2286,7 +2288,6 @@ void LinearScan::checkLastUses(BasicBlock* block) foundDiff = true; } - JITDUMP("++ V%02u in computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); VarSetOps::AddElemD(compiler, computedLive, varIndex); } else if (currentRefPosition->lastUse) @@ -2302,7 +2303,6 @@ void LinearScan::checkLastUses(BasicBlock* block) if (currentRefPosition->refType == RefTypeDef || currentRefPosition->refType == RefTypeDummyDef) { - JITDUMP("-- V%02u from computedLive\n", compiler->lvaTrackedIndexToLclNum(varIndex)); VarSetOps::RemoveElemD(compiler, computedLive, varIndex); } } diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 266daa5508bc5d..ba947faa1150cc 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -187,8 +187,8 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, regMaskTP consecutiveResult = RBM_NONE; regMaskTP busyRegsInThisLocation = regsBusyUntilKill | regsInUseThisLocation; -// At this point, for 'n' registers requirement, if Rm+1, Rm+2, Rm+3, ..., Rm+k are -// available, create the mask only for Rm+1, Rm+2, ..., Rm+(k-n+1) to convey that it +// At this point, for 'n' registers requirement, if Rm, Rm+1, Rm+2, ..., Rm+k-1 are +// available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it // is safe to assign any of those registers, but not beyond that. #define AppendConsecutiveMask(startIndex, endIndex, availableRegistersMask) \ regMaskTP selectionStartMask = (1ULL << regAvailableStartIndex) - 1; \ From 0c56514ac66623ce452c6b00ac5610055e01e203 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 15:08:54 -0700 Subject: [PATCH 109/125] Move definitions in GenTree, fix assert --- src/coreclr/jit/compiler.h | 40 +++++----- src/coreclr/jit/gentree.cpp | 94 ++++++++++++++++++++++- src/coreclr/jit/hwintrinsicarm64.cpp | 107 +-------------------------- 3 files changed, 118 insertions(+), 123 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f0ab1d68ae324f..60ada72070897f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2780,6 +2780,27 @@ class Compiler CORINFO_CLASS_HANDLE clsHnd, CORINFO_SIG_INFO* sig, CorInfoType simdBaseJitType); + +#ifdef TARGET_ARM64 + GenTreeHWIntrinsic* gtNewSimdVectorTableLookupNode(GenTree* op1, + GenTree* op2, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + var_types argType, + var_types retType, + unsigned fieldCount); + GenTreeHWIntrinsic* gtNewSimdVectorTableLookupExtensionNode(GenTree* op1, + GenTree* op2, + GenTree* op3, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + var_types argType, + var_types retType, + unsigned fieldCount); + GenTreeFieldList* getConvertTableOpToFieldList(GenTree* op, unsigned fieldCount); +#endif #endif // FEATURE_HW_INTRINSICS GenTree* gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd); @@ -3943,25 +3964,6 @@ class Compiler CorInfoType simdBaseJitType, var_types retType, unsigned simdSize); -#ifdef TARGET_ARM64 - GenTreeHWIntrinsic* impVectorTableLookup(GenTree* op1, - GenTree* op2, - NamedIntrinsic hwIntrinsicID, - CorInfoType simdBaseJitType, - unsigned simdSize, - var_types argType, - var_types retType, - unsigned fieldCount); - GenTreeHWIntrinsic* impVectorTableLookupExtension(GenTree* op1, - GenTree* op2, - GenTree* op3, - NamedIntrinsic hwIntrinsicID, - CorInfoType simdBaseJitType, - unsigned simdSize, - var_types argType, - var_types retType, - unsigned fieldCount); -#endif GenTree* getArgForHWIntrinsic(var_types argType, CORINFO_CLASS_HANDLE argClass, diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ae4c8938db7c06..1e2f93204e369c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -23074,7 +23074,7 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, op2 = gtNewVconNode(type); op2->AsVecCon()->gtSimdVal = vecCns; - return impVectorTableLookup(op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, type, type, 0); + return gtNewSimdVectorTableLookupNode(op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, type, type, 0); #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -23953,6 +23953,98 @@ GenTree* Compiler::gtNewSimdWithElementNode(var_types type, return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); } +#ifdef TARGET_ARM64 +GenTreeHWIntrinsic* Compiler::gtNewSimdVectorTableLookupNode(GenTree* op1, + GenTree* op2, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + var_types argType, + var_types retType, + unsigned fieldCount) +{ + info.needsConsecutiveRegisters = true; + + GenTreeHWIntrinsic* retNode = nullptr; + + if (op1->TypeGet() == TYP_STRUCT) + { + if (!op1->OperIs(GT_LCL_VAR)) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); + + impAssignTempGen(tmp, op1, CHECK_SPILL_NONE); + op1 = gtNewLclvNode(tmp, argType); + } + + op1 = getConvertTableOpToFieldList(op1, fieldCount); + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsicID, simdBaseJitType, simdSize); + } + else + { + assert(varTypeIsSIMD(op1->TypeGet())); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsicID, simdBaseJitType, simdSize); + } + return retNode; +} + +GenTreeHWIntrinsic* Compiler::gtNewSimdVectorTableLookupExtensionNode(GenTree* op1, + GenTree* op2, + GenTree* op3, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + var_types argType, + var_types retType, + unsigned fieldCount) + +{ + info.needsConsecutiveRegisters = true; + + GenTreeHWIntrinsic* retNode = nullptr; + if (op2->TypeGet() == TYP_STRUCT) + { + if (!op2->OperIs(GT_LCL_VAR)) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookupExtension temp tree")); + + impAssignTempGen(tmp, op2, CHECK_SPILL_NONE); + op2 = gtNewLclvNode(tmp, argType); + } + + op2 = getConvertTableOpToFieldList(op2, fieldCount); + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); + } + else + { + assert(varTypeIsSIMD(op1->TypeGet())); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); + } + return retNode; +} + +GenTreeFieldList* Compiler::getConvertTableOpToFieldList(GenTree* op, unsigned fieldCount) +{ + LclVarDsc* opVarDsc = lvaGetDesc(op->AsLclVar()); + unsigned lclNum = lvaGetLclNum(opVarDsc); + unsigned fieldSize = opVarDsc->lvSize() / fieldCount; + var_types fieldType = TYP_SIMD16; + + GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); + int offset = 0; + for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) + { + GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fieldType, offset); + fieldList->AddField(this, fldNode, offset, fieldType); + + offset += fieldSize; + } + return fieldList; +} +#endif + GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID) { return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index ab2ffcd8ab3ffc..c656140ff0d42d 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1893,8 +1893,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impPopStack().val; unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); - retNode = - impVectorTableLookup(op1, op2, intrinsic, simdBaseJitType, simdSize, argType, retType, fieldCount); + retNode = gtNewSimdVectorTableLookupNode(op1, op2, intrinsic, simdBaseJitType, simdSize, argType, retType, + fieldCount); break; } @@ -1917,8 +1917,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impPopStack().val; unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); - retNode = impVectorTableLookupExtension(op1, op2, op3, intrinsic, simdBaseJitType, simdSize, argType, - retType, fieldCount); + retNode = gtNewSimdVectorTableLookupExtensionNode(op1, op2, op3, intrinsic, simdBaseJitType, simdSize, + argType, retType, fieldCount); break; } default: @@ -1930,103 +1930,4 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return retNode; } -GenTreeHWIntrinsic* Compiler::impVectorTableLookup(GenTree* op1, - GenTree* op2, - NamedIntrinsic hwIntrinsicID, - CorInfoType simdBaseJitType, - unsigned simdSize, - var_types argType, - var_types retType, - unsigned fieldCount) -{ - info.needsConsecutiveRegisters = true; - - GenTreeHWIntrinsic* retNode = nullptr; - - if (op1->TypeGet() == TYP_STRUCT) - { - if (!op1->OperIs(GT_LCL_VAR)) - { - unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); - - impAssignTempGen(tmp, op1, CHECK_SPILL_NONE); - op1 = gtNewLclvNode(tmp, argType); - } - - LclVarDsc* op1VarDsc = lvaGetDesc(op1->AsLclVar()); - unsigned lclNum = lvaGetLclNum(op1VarDsc); - unsigned fieldSize = op1VarDsc->lvSize() / fieldCount; - var_types fieldType = TYP_SIMD16; - - GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - int offset = 0; - for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) - { - GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fieldType, offset); - fieldList->AddField(this, fldNode, offset, fieldType); - - offset += fieldSize; - } - op1 = fieldList; - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsicID, simdBaseJitType, simdSize); - } - else - { - assert(varTypeIsSIMD(op1->TypeGet())); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsicID, simdBaseJitType, simdSize); - } - return retNode; -} - -GenTreeHWIntrinsic* Compiler::impVectorTableLookupExtension(GenTree* op1, - GenTree* op2, - GenTree* op3, - NamedIntrinsic hwIntrinsicID, - CorInfoType simdBaseJitType, - unsigned simdSize, - var_types argType, - var_types retType, - unsigned fieldCount) - -{ - info.needsConsecutiveRegisters = true; - - GenTreeHWIntrinsic* retNode = nullptr; - if (op2->TypeGet() == TYP_STRUCT) - { - if (!op2->OperIs(GT_LCL_VAR)) - { - unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookupExtension temp tree")); - - impAssignTempGen(tmp, op2, CHECK_SPILL_NONE); - op2 = gtNewLclvNode(tmp, argType); - } - - LclVarDsc* op2VarDsc = lvaGetDesc(op2->AsLclVar()); - unsigned lclNum = lvaGetLclNum(op2VarDsc); - unsigned fieldSize = op2VarDsc->lvSize() / fieldCount; - var_types fieldType = TYP_SIMD16; - - GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList(); - int offset = 0; - for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++) - { - GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, fieldType, offset); - fieldList->AddField(this, fldNode, offset, fieldType); - - offset += fieldSize; - } - op2 = fieldList; - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); - } - else - { - assert(varTypeIsSIMD(op1->TypeGet())); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); - } - return retNode; -} - #endif // FEATURE_HW_INTRINSICS From 35a75508cf916afe3e12bcce45d72583680bc5c4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 20:08:48 -0700 Subject: [PATCH 110/125] fix arm issue --- src/coreclr/jit/lsra.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 74aa292789bdab..69d10e239f3181 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -11704,6 +11704,10 @@ void LinearScan::RegisterSelection::try_SPILL_COST() regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit); RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; Interval* assignedInterval = spillCandidateRegRecord->assignedInterval; + RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr; + + // Can and should the interval in this register be spilled for this one, + // if we don't find a better alternative? #ifdef TARGET_ARM64 if (assignedInterval == nullptr) @@ -11713,19 +11717,13 @@ void LinearScan::RegisterSelection::try_SPILL_COST() // candidate or not. Skip processing it. continue; } -#endif - - RefPosition* recentRefPosition = assignedInterval->recentRefPosition; -// Can and should the interval in this register be spilled for this one, -// if we don't find a better alternative? -#ifdef TARGET_ARM64 if ((recentRefPosition != nullptr) && linearScan->isRefPositionActive(recentRefPosition, thisLocation) && (recentRefPosition->needsConsecutive)) { continue; } -#endif +#endif // TARGET_ARM64 if ((linearScan->getNextIntervalRef(spillCandidateRegNum, regType) == thisLocation) && !assignedInterval->getNextRefPosition()->RegOptional()) From ff587ac0cebdc5f0c41c510141480f3497058acc Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 21:05:17 -0700 Subject: [PATCH 111/125] Remove common functions --- src/coreclr/jit/compiler.h | 19 +------ src/coreclr/jit/gentree.cpp | 78 ++-------------------------- src/coreclr/jit/hwintrinsicarm64.cpp | 57 ++++++++++++++++---- src/coreclr/jit/lsra.cpp | 10 ++-- src/coreclr/jit/lsraarm64.cpp | 7 ++- 5 files changed, 61 insertions(+), 110 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 60ada72070897f..0ca5b8c488c01a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2782,24 +2782,7 @@ class Compiler CorInfoType simdBaseJitType); #ifdef TARGET_ARM64 - GenTreeHWIntrinsic* gtNewSimdVectorTableLookupNode(GenTree* op1, - GenTree* op2, - NamedIntrinsic hwIntrinsicID, - CorInfoType simdBaseJitType, - unsigned simdSize, - var_types argType, - var_types retType, - unsigned fieldCount); - GenTreeHWIntrinsic* gtNewSimdVectorTableLookupExtensionNode(GenTree* op1, - GenTree* op2, - GenTree* op3, - NamedIntrinsic hwIntrinsicID, - CorInfoType simdBaseJitType, - unsigned simdSize, - var_types argType, - var_types retType, - unsigned fieldCount); - GenTreeFieldList* getConvertTableOpToFieldList(GenTree* op, unsigned fieldCount); + GenTreeFieldList* gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount); #endif #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 1e2f93204e369c..0438ff15bdbe6d 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -23074,7 +23074,8 @@ GenTree* Compiler::gtNewSimdShuffleNode(var_types type, op2 = gtNewVconNode(type); op2->AsVecCon()->gtSimdVal = vecCns; - return gtNewSimdVectorTableLookupNode(op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, type, type, 0); + return gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + #else #error Unsupported platform #endif // !TARGET_XARCH && !TARGET_ARM64 @@ -23954,78 +23955,7 @@ GenTree* Compiler::gtNewSimdWithElementNode(var_types type, } #ifdef TARGET_ARM64 -GenTreeHWIntrinsic* Compiler::gtNewSimdVectorTableLookupNode(GenTree* op1, - GenTree* op2, - NamedIntrinsic hwIntrinsicID, - CorInfoType simdBaseJitType, - unsigned simdSize, - var_types argType, - var_types retType, - unsigned fieldCount) -{ - info.needsConsecutiveRegisters = true; - - GenTreeHWIntrinsic* retNode = nullptr; - - if (op1->TypeGet() == TYP_STRUCT) - { - if (!op1->OperIs(GT_LCL_VAR)) - { - unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); - - impAssignTempGen(tmp, op1, CHECK_SPILL_NONE); - op1 = gtNewLclvNode(tmp, argType); - } - - op1 = getConvertTableOpToFieldList(op1, fieldCount); - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsicID, simdBaseJitType, simdSize); - } - else - { - assert(varTypeIsSIMD(op1->TypeGet())); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, hwIntrinsicID, simdBaseJitType, simdSize); - } - return retNode; -} - -GenTreeHWIntrinsic* Compiler::gtNewSimdVectorTableLookupExtensionNode(GenTree* op1, - GenTree* op2, - GenTree* op3, - NamedIntrinsic hwIntrinsicID, - CorInfoType simdBaseJitType, - unsigned simdSize, - var_types argType, - var_types retType, - unsigned fieldCount) - -{ - info.needsConsecutiveRegisters = true; - - GenTreeHWIntrinsic* retNode = nullptr; - if (op2->TypeGet() == TYP_STRUCT) - { - if (!op2->OperIs(GT_LCL_VAR)) - { - unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookupExtension temp tree")); - - impAssignTempGen(tmp, op2, CHECK_SPILL_NONE); - op2 = gtNewLclvNode(tmp, argType); - } - - op2 = getConvertTableOpToFieldList(op2, fieldCount); - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); - } - else - { - assert(varTypeIsSIMD(op1->TypeGet())); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, hwIntrinsicID, simdBaseJitType, simdSize); - } - return retNode; -} - -GenTreeFieldList* Compiler::getConvertTableOpToFieldList(GenTree* op, unsigned fieldCount) +GenTreeFieldList* Compiler::gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount) { LclVarDsc* opVarDsc = lvaGetDesc(op->AsLclVar()); unsigned lclNum = lvaGetLclNum(opVarDsc); @@ -24043,7 +23973,7 @@ GenTreeFieldList* Compiler::getConvertTableOpToFieldList(GenTree* op, unsigned f } return fieldList; } -#endif +#endif // TARGET_ARM64 GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID) { diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index c656140ff0d42d..ecccbcf8592270 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1888,14 +1888,31 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); op2 = getArgForHWIntrinsic(argType, argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = impPopStack().val; + + if (op1->TypeGet() == TYP_STRUCT) + { + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); + + info.needsConsecutiveRegisters = true; + + if (!op1->OperIs(GT_LCL_VAR)) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = impPopStack().val; - unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); + impAssignTempGen(tmp, op1, CHECK_SPILL_NONE); + op1 = gtNewLclvNode(tmp, argType); + } - retNode = gtNewSimdVectorTableLookupNode(op1, op2, intrinsic, simdBaseJitType, simdSize, argType, retType, - fieldCount); + op1 = gtConvertTableOpToFieldList(op1, fieldCount); + } + else + { + assert(varTypeIsSIMD(op1->TypeGet())); + } + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); break; } case NI_AdvSimd_VectorTableLookupExtension: @@ -1911,14 +1928,32 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass))); op3 = getArgForHWIntrinsic(argType, argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); + op2 = impPopStack().val; + op1 = impPopStack().val; + + if (op2->TypeGet() == TYP_STRUCT) + { + info.needsConsecutiveRegisters = true; + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); + + if (!op2->OperIs(GT_LCL_VAR)) + { + unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookupExtension temp tree")); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass))); - op2 = impPopStack().val; - op1 = impPopStack().val; - unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); + impAssignTempGen(tmp, op2, CHECK_SPILL_NONE); + op2 = gtNewLclvNode(tmp, argType); + } + + op2 = gtConvertTableOpToFieldList(op2, fieldCount); + } + else + { + assert(varTypeIsSIMD(op1->TypeGet())); + + } - retNode = gtNewSimdVectorTableLookupExtensionNode(op1, op2, op3, intrinsic, simdBaseJitType, simdSize, - argType, retType, fieldCount); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); break; } default: diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 69d10e239f3181..5d258890f5495e 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -11701,13 +11701,13 @@ void LinearScan::RegisterSelection::try_SPILL_COST() { regMaskTP spillCandidateBit = genFindLowestBit(spillCandidates); spillCandidates &= ~spillCandidateBit; - regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit); - RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; - Interval* assignedInterval = spillCandidateRegRecord->assignedInterval; + regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit); + RegRecord* spillCandidateRegRecord = &linearScan->physRegs[spillCandidateRegNum]; + Interval* assignedInterval = spillCandidateRegRecord->assignedInterval; RefPosition* recentRefPosition = assignedInterval != nullptr ? assignedInterval->recentRefPosition : nullptr; - // Can and should the interval in this register be spilled for this one, - // if we don't find a better alternative? +// Can and should the interval in this register be spilled for this one, +// if we don't find a better alternative? #ifdef TARGET_ARM64 if (assignedInterval == nullptr) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index ba947faa1150cc..cd29cca43bd7c1 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -65,7 +65,6 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit { assert(compiler->info.needsConsecutiveRegisters); assert(firstRefPosition->assignedReg() == firstRegAssigned); - assert(isSingleRegister(genRegMask(firstRegAssigned))); assert(firstRefPosition->isFirstRefPositionOfConsecutiveRegisters()); assert(emitter::isVectorRegister(firstRegAssigned)); @@ -129,6 +128,7 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition RefPosition* nextRefPosition = firstRefPosition; regNumber regToAssign = firstRegAssigned; assert(compiler->info.needsConsecutiveRegisters && registersCount > 1); + assert(emitter::isVectorRegister(firstRegAssigned)); int i = 1; do @@ -1407,6 +1407,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou { assert(intrin.op2 != nullptr); assert(intrin.op3 != nullptr); + assert((intrin.id == NI_AdvSimd_VectorTableLookupExtension) || + (intrin.id == NI_AdvSimd_Arm64_VectorTableLookupExtension)); srcCount += BuildConsecutiveRegistersForUse(intrin.op2); srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1) : BuildOperandUses(intrin.op3); } @@ -1510,10 +1512,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) { - assert(compiler->info.needsConsecutiveRegisters); int srcCount = 0; if (treeNode->OperIsFieldList()) { + assert(compiler->info.needsConsecutiveRegisters); + unsigned regCount = 0; RefPosition* firstRefPos = nullptr; RefPosition* currRefPos = nullptr; From dab2121d1d1c7ed68cb6fe1a55e97684dfc9ea87 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 21:10:46 -0700 Subject: [PATCH 112/125] Rename info.needsConsecutiveRegisters to info.compNeedsConsecutiveRegisters --- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fginline.cpp | 2 +- src/coreclr/jit/hwintrinsicarm64.cpp | 5 ++--- src/coreclr/jit/lsra.cpp | 3 +-- src/coreclr/jit/lsraarm64.cpp | 10 +++++----- 6 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 92078ba60406e3..a90f8aa4c0a4b3 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6683,7 +6683,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, #endif #ifdef TARGET_ARM64 - info.needsConsecutiveRegisters = false; + info.compNeedsConsecutiveRegisters = false; #endif /* Initialize emitter */ diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 0ca5b8c488c01a..5399bcc034388e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -10038,7 +10038,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX unsigned compHandleHistogramProbeCount; #ifdef TARGET_ARM64 - bool needsConsecutiveRegisters; + bool compNeedsConsecutiveRegisters; #endif } info; diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index af6d7203b12033..aa8ebc0e0519ac 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -1455,7 +1455,7 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) lvaGenericsContextInUse |= InlineeCompiler->lvaGenericsContextInUse; #ifdef TARGET_ARM64 - info.needsConsecutiveRegisters |= InlineeCompiler->info.needsConsecutiveRegisters; + info.compNeedsConsecutiveRegisters |= InlineeCompiler->info.compNeedsConsecutiveRegisters; #endif // If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler. diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index ecccbcf8592270..2a692e4dba80a1 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1893,10 +1893,9 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (op1->TypeGet() == TYP_STRUCT) { + info.compNeedsConsecutiveRegisters = true; unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); - info.needsConsecutiveRegisters = true; - if (!op1->OperIs(GT_LCL_VAR)) { unsigned tmp = lvaGrabTemp(true DEBUGARG("VectorTableLookup temp tree")); @@ -1934,7 +1933,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (op2->TypeGet() == TYP_STRUCT) { - info.needsConsecutiveRegisters = true; + info.compNeedsConsecutiveRegisters = true; unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); if (!op2->OperIs(GT_LCL_VAR)) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 5d258890f5495e..179fdead1a124d 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1312,7 +1312,7 @@ PhaseStatus LinearScan::doLinearScan() initVarRegMaps(); #ifdef TARGET_ARM64 - if (compiler->info.needsConsecutiveRegisters) + if (compiler->info.compNeedsConsecutiveRegisters) { allocateRegisters(); } @@ -2287,7 +2287,6 @@ void LinearScan::checkLastUses(BasicBlock* block) loc); foundDiff = true; } - VarSetOps::AddElemD(compiler, computedLive, varIndex); } else if (currentRefPosition->lastUse) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index cd29cca43bd7c1..1032854cb2f3c8 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -36,7 +36,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) { - assert(compiler->info.needsConsecutiveRegisters); + assert(compiler->info.compNeedsConsecutiveRegisters); RefPosition* nextRefPosition; assert(refPosition->needsConsecutive); nextConsecutiveRefPositionMap->Lookup(refPosition, &nextRefPosition); @@ -63,7 +63,7 @@ RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) // void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned) { - assert(compiler->info.needsConsecutiveRegisters); + assert(compiler->info.compNeedsConsecutiveRegisters); assert(firstRefPosition->assignedReg() == firstRegAssigned); assert(firstRefPosition->isFirstRefPositionOfConsecutiveRegisters()); assert(emitter::isVectorRegister(firstRegAssigned)); @@ -127,7 +127,7 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition int registersCount = firstRefPosition->regCount; RefPosition* nextRefPosition = firstRefPosition; regNumber regToAssign = firstRegAssigned; - assert(compiler->info.needsConsecutiveRegisters && registersCount > 1); + assert(compiler->info.compNeedsConsecutiveRegisters && registersCount > 1); assert(emitter::isVectorRegister(firstRegAssigned)); int i = 1; @@ -259,7 +259,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, RefPosition* refPosition, regMaskTP* busyCandidates) { - assert(compiler->info.needsConsecutiveRegisters); + assert(compiler->info.compNeedsConsecutiveRegisters); assert(refPosition->isFirstRefPositionOfConsecutiveRegisters()); regMaskTP freeCandidates = allCandidates & m_AvailableRegs; if (freeCandidates == RBM_NONE) @@ -1515,7 +1515,7 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) int srcCount = 0; if (treeNode->OperIsFieldList()) { - assert(compiler->info.needsConsecutiveRegisters); + assert(compiler->info.compNeedsConsecutiveRegisters); unsigned regCount = 0; RefPosition* firstRefPos = nullptr; From e94cfcf2d2286bd7fa108b1b0764cb8b58262a6f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 21:32:26 -0700 Subject: [PATCH 113/125] Use needsConsecutiveRegisters template parameter for all configurations --- src/coreclr/jit/hwintrinsicarm64.cpp | 5 ++-- src/coreclr/jit/lsra.cpp | 41 ++++++---------------------- src/coreclr/jit/lsra.h | 8 +----- 3 files changed, 11 insertions(+), 43 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 2a692e4dba80a1..bcc5a40f079a52 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -1894,7 +1894,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (op1->TypeGet() == TYP_STRUCT) { info.compNeedsConsecutiveRegisters = true; - unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); if (!op1->OperIs(GT_LCL_VAR)) { @@ -1934,7 +1934,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (op2->TypeGet() == TYP_STRUCT) { info.compNeedsConsecutiveRegisters = true; - unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); + unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass); if (!op2->OperIs(GT_LCL_VAR)) { @@ -1949,7 +1949,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, else { assert(varTypeIsSIMD(op1->TypeGet())); - } retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 179fdead1a124d..7f834e15eac340 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -2841,24 +2841,13 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // of all but also has a weight lower than 'refPosition'. If there is // no such ref position, no register will be allocated. // -#ifdef TARGET_ARM64 template -#endif regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { - regMaskTP foundRegBit; + regMaskTP foundRegBit = + regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); -#ifdef TARGET_ARM64 - if (needsConsecutiveRegisters) - { - foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); - } - else -#endif // TARGET_ARM64 - { - foundRegBit = regSelector->select(currentInterval, refPosition DEBUG_ARG(registerScore)); - } if (foundRegBit == RBM_NONE) { return REG_NA; @@ -3132,9 +3121,7 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R // Prefer a free register that's got the earliest next use. // Otherwise, spill something with the farthest next use // -#ifdef TARGET_ARM64 template -#endif regNumber LinearScan::assignCopyReg(RefPosition* refPosition) { Interval* currentInterval = refPosition->getInterval(); @@ -3157,18 +3144,9 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) refPosition->copyReg = true; RegisterScore registerScore = NONE; - regNumber allocatedReg; -#ifdef TARGET_ARM64 - if (needsConsecutiveRegisters) - { - assert(refPosition->needsConsecutive); - allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); - } - else -#endif - { - allocatedReg = allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); - } + regNumber allocatedReg = + allocateReg(currentInterval, refPosition DEBUG_ARG(®isterScore)); + assert(allocatedReg != REG_NA); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, allocatedReg, nullptr, registerScore)); @@ -12002,9 +11980,7 @@ void LinearScan::RegisterSelection::calculateCoversSets() // Return Values: // Register bit selected (a single register) and REG_NA if no register was selected. // -#ifdef TARGET_ARM64 template -#endif regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)) { @@ -12213,10 +12189,9 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, // Eliminate candidates that are in-use or busy. if (!found) { - /* - * we assign same registerAssignment to UPPER_RESTORE and the next USE. When we allocate for - * USE, we see that the same register is now busy and so don't have candidates left. - */ + // TODO-CQ: We assign same registerAssignment to UPPER_RESTORE and the next USE. + // When we allocate for USE, we see that the register is busy at current location + // and we end up with that candidate is no longer available. regMaskTP busyRegs = linearScan->regsBusyUntilKill | linearScan->regsInUseThisLocation; candidates &= ~busyRegs; diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 6fd9d1c6bba165..7c5b803f9abf96 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1168,13 +1168,9 @@ class LinearScan : public LinearScanInterface #ifdef DEBUG const char* getScoreName(RegisterScore score); #endif -#ifdef TARGET_ARM64 template -#endif regNumber allocateReg(Interval* current, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); -#ifdef TARGET_ARM64 template -#endif regNumber assignCopyReg(RefPosition* refPosition); bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); @@ -1243,10 +1239,8 @@ class LinearScan : public LinearScanInterface public: RegisterSelection(LinearScan* linearScan); -// Perform register selection and update currentInterval or refPosition -#ifdef TARGET_ARM64 + // Perform register selection and update currentInterval or refPosition template -#endif FORCEINLINE regMaskTP select(Interval* currentInterval, RefPosition* refPosition DEBUG_ARG(RegisterScore* registerScore)); From ab007d0b5a4cdad281d40a320f69eb936d421a87 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 22 Mar 2023 22:54:36 -0700 Subject: [PATCH 114/125] Handle case of round-robin in getConsecutiveRegisters() --- src/coreclr/jit/lsraarm64.cpp | 59 +++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 1032854cb2f3c8..15844675423149 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -237,6 +237,59 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, currAvailableRegs &= ~endMask; } while (currAvailableRegs != RBM_NONE); + if ((candidates & 0x8000000100000000) == 0x8000000100000000) + { + // Finally, check for round robin case where sequence of last register + // round to first register is available. + // For n registers needed, it checks if MSB (n-1) + LSB (1) or + // MSB (n - 2) + LSB (2) registers are available and if yes, + // set the least bit of such MSB. + // + // This could have done using bit-twiddling, but is simpler when the + // checks are done with these hardcoded values. + switch (registersNeeded) + { + case 2: + if ((candidates & 0x8000000100000000) != RBM_NONE) + { + consecutiveResult |= 0x8000000000000000; + overallResult |= 0x8000000100000000; + } + break; + case 3: + if ((candidates & 0xC000000100000000) != RBM_NONE) + { + consecutiveResult |= 0x4000000000000000; + overallResult |= 0xC000000100000000; + } + if ((candidates & 0x8000000300000000) != RBM_NONE) + { + consecutiveResult |= 0x8000000000000000; + overallResult |= 0x8000000300000000; + } + break; + case 4: + if ((candidates & 0xE000000100000000) != RBM_NONE) + { + consecutiveResult |= 0x2000000000000000; + overallResult |= 0xE000000100000000; + } + if ((candidates & 0xC000000300000000) != RBM_NONE) + { + consecutiveResult |= 0x4000000000000000; + overallResult |= 0xC000000300000000; + } + if ((candidates & 0x8000000700000000) != RBM_NONE) + { + consecutiveResult |= 0x8000000000000000; + overallResult |= 0x8000000700000000; + } + break; + default: + assert(!"Unexpected registersNeeded\n"); + } + } + *allConsecutiveCandidates = overallResult; return consecutiveResult; } @@ -255,6 +308,12 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, // Returns: // Register mask of consecutive registers. // +// Notes: +// The consecutive registers mask includes just the bits of first registers or +// (n - k) registers. For example, if we need 3 consecutive registers and +// allCandidates = 0x1C080D0F00000000, the consecutive register mask returned +// will be 0x400000300000000. +// regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, RefPosition* refPosition, regMaskTP* busyCandidates) From 5d6cc2dd40fa771374c8635f00c003fa4aac9ec5 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 24 Mar 2023 09:37:31 -0700 Subject: [PATCH 115/125] Disable tests for Mono --- .../Arm/Shared/VectorLookupExtension_2Test.template | 1 + .../Arm/Shared/VectorLookupExtension_3Test.template | 1 + .../Arm/Shared/VectorLookupExtension_4Test.template | 1 + .../HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template | 1 + .../HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template | 1 + .../HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template | 1 + 6 files changed, 6 insertions(+) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template index 2db90278bb29e1..66e32cf1ae707a 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template @@ -18,6 +18,7 @@ namespace JIT.HardwareIntrinsics.Arm { public static partial class Program { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] [Fact] public static void VectorLookupExtension_2_{RetBaseType}() { diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template index b02c6ce4949280..56e6f5639c8422 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template @@ -18,6 +18,7 @@ namespace JIT.HardwareIntrinsics.Arm { public static partial class Program { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] [Fact] public static void VectorLookupExtension_3_{RetBaseType}() { diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template index f5e00730f15279..06d2bb8ea97f89 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template @@ -18,6 +18,7 @@ namespace JIT.HardwareIntrinsics.Arm { public static partial class Program { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] [Fact] public static void VectorLookupExtension_4_{RetBaseType}() { diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template index 11c911094b416c..929bf9372a9474 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template @@ -18,6 +18,7 @@ namespace JIT.HardwareIntrinsics.Arm { public static partial class Program { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] [Fact] public static void VectorLookup_2_{RetBaseType}() { diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template index eff5fb5f478564..8c73c37dcec0b7 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template @@ -18,6 +18,7 @@ namespace JIT.HardwareIntrinsics.Arm { public static partial class Program { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] [Fact] public static void VectorLookup_3_{RetBaseType}() { diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template index d4536cfb144b7b..8bda76b143c6ad 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template @@ -18,6 +18,7 @@ namespace JIT.HardwareIntrinsics.Arm { public static partial class Program { + [ActiveIssue("https://github.com/dotnet/runtime/issues/83891", TestRuntimes.Mono)] [Fact] public static void VectorLookup_4_{RetBaseType}() { From 24e6158373f32f8afd86c437052a95f4aec0183e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sun, 26 Mar 2023 10:44:25 -0700 Subject: [PATCH 116/125] Initialize outArray in test --- .../Arm/Shared/VectorLookupExtension_2Test.template | 1 + .../HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template | 1 + 2 files changed, 2 insertions(+) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template index 66e32cf1ae707a..e24dbf269f682c 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template @@ -89,6 +89,7 @@ namespace JIT.HardwareIntrinsics.Arm this.inArray1 = new byte[alignment * 2]; this.inArray2 = new byte[alignment * 2]; this.inArray3 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned); this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template index 929bf9372a9474..5c7a4ea4b6ab42 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template @@ -85,6 +85,7 @@ namespace JIT.HardwareIntrinsics.Arm this.inArray1 = new byte[alignment * 2]; this.inArray2 = new byte[alignment * 2]; this.inArray3 = new byte[alignment * 2]; + this.outArray = new byte[alignment * 2]; this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned); this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned); From 4026aa68adc46f31004282dea082a84ebcc75d3a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 29 Mar 2023 19:04:10 -0700 Subject: [PATCH 117/125] Add IsSupported checks for VectorLookup/VectorLookupExtension --- .../VectorLookupExtension_2Test.template | 63 ++++++++++++++----- .../VectorLookupExtension_3Test.template | 63 ++++++++++++++----- .../VectorLookupExtension_4Test.template | 63 ++++++++++++++----- .../Arm/Shared/VectorLookup_2Test.template | 63 ++++++++++++++----- .../Arm/Shared/VectorLookup_3Test.template | 63 ++++++++++++++----- .../Arm/Shared/VectorLookup_4Test.template | 63 ++++++++++++++----- 6 files changed, 282 insertions(+), 96 deletions(-) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template index e24dbf269f682c..9bb30aefb30165 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template @@ -24,29 +24,37 @@ namespace JIT.HardwareIntrinsics.Arm { var test = new VectorLookupExtension_2Test__{Method}{RetBaseType}(); - // Validates basic functionality works, using Unsafe.Read - test.RunBasicScenario_UnsafeRead(); + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); - // Validates calling via reflection works, using Unsafe.Read - test.RunReflectionScenario_UnsafeRead(); + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); - // Validates passing a static member works - test.RunClsVarScenario(); + // Validates passing a static member works + test.RunClsVarScenario(); - // Validates passing a local works, using Unsafe.Read - test.RunLclVarScenario_UnsafeRead(); + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); - // Validates passing the field of a local class works - test.RunClassLclFldScenario(); + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); - // Validates passing an instance member of a class works - test.RunClassFldScenario(); + // Validates passing an instance member of a class works + test.RunClassFldScenario(); - // Validates passing the field of a local struct works - test.RunStructLclFldScenario(); + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); - // Validates passing an instance member of a struct works - test.RunStructFldScenario(); + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } if (!test.Succeeded) { @@ -213,6 +221,8 @@ namespace JIT.HardwareIntrinsics.Arm _dataTable = new DataTable(_data0, _data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); } + public bool IsSupported => {Isa}.IsSupported; + public bool Succeeded { get; set; } public void RunBasicScenario_UnsafeRead() @@ -336,6 +346,27 @@ namespace JIT.HardwareIntrinsics.Arm test.RunStructFldScenario(this); } + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + private void ValidateResult({Op2VectorType}<{Op2BaseType}> op0, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op2VectorType}<{Op2BaseType}> op3, void* result, [CallerMemberName] string method = "") { {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template index 56e6f5639c8422..442b903788ca8a 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template @@ -24,29 +24,37 @@ namespace JIT.HardwareIntrinsics.Arm { var test = new VectorLookupExtension_3Test__{Method}{RetBaseType}(); - // Validates basic functionality works, using Unsafe.Read - test.RunBasicScenario_UnsafeRead(); + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); - // Validates calling via reflection works, using Unsafe.Read - test.RunReflectionScenario_UnsafeRead(); + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); - // Validates passing a static member works - test.RunClsVarScenario(); + // Validates passing a static member works + test.RunClsVarScenario(); - // Validates passing a local works, using Unsafe.Read - test.RunLclVarScenario_UnsafeRead(); + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); - // Validates passing the field of a local class works - test.RunClassLclFldScenario(); + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); - // Validates passing an instance member of a class works - test.RunClassFldScenario(); + // Validates passing an instance member of a class works + test.RunClassFldScenario(); - // Validates passing the field of a local struct works - test.RunStructLclFldScenario(); + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); - // Validates passing an instance member of a struct works - test.RunStructFldScenario(); + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } if (!test.Succeeded) { @@ -232,6 +240,8 @@ namespace JIT.HardwareIntrinsics.Arm _dataTable = new DataTable(_data0, _data1, _data2, _data3, _data4, new {RetBaseType}[RetElementCount], LargestVectorSize); } + public bool IsSupported => {Isa}.IsSupported; + public bool Succeeded { get; set; } public void RunBasicScenario_UnsafeRead() @@ -361,6 +371,27 @@ namespace JIT.HardwareIntrinsics.Arm test.RunStructFldScenario(this); } + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + private void ValidateResult({Op2VectorType}<{Op2BaseType}> op0, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op2VectorType}<{Op2BaseType}> op4, void* result, [CallerMemberName] string method = "") { {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template index 06d2bb8ea97f89..21bef8c7f049b6 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template @@ -24,29 +24,37 @@ namespace JIT.HardwareIntrinsics.Arm { var test = new VectorLookupExtension_4Test__{Method}{RetBaseType}(); - // Validates basic functionality works, using Unsafe.Read - test.RunBasicScenario_UnsafeRead(); + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); - // Validates calling via reflection works, using Unsafe.Read - test.RunReflectionScenario_UnsafeRead(); + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); - // Validates passing a static member works - test.RunClsVarScenario(); + // Validates passing a static member works + test.RunClsVarScenario(); - // Validates passing a local works, using Unsafe.Read - test.RunLclVarScenario_UnsafeRead(); + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); - // Validates passing the field of a local class works - test.RunClassLclFldScenario(); + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); - // Validates passing an instance member of a class works - test.RunClassFldScenario(); + // Validates passing an instance member of a class works + test.RunClassFldScenario(); - // Validates passing the field of a local struct works - test.RunStructLclFldScenario(); + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); - // Validates passing an instance member of a struct works - test.RunStructFldScenario(); + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } if (!test.Succeeded) { @@ -251,6 +259,8 @@ namespace JIT.HardwareIntrinsics.Arm _dataTable = new DataTable(_data0, _data1, _data2, _data3, _data4, _data5, new {RetBaseType}[RetElementCount], LargestVectorSize); } + public bool IsSupported => {Isa}.IsSupported; + public bool Succeeded { get; set; } public void RunBasicScenario_UnsafeRead() @@ -386,6 +396,27 @@ namespace JIT.HardwareIntrinsics.Arm test.RunStructFldScenario(this); } + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + private void ValidateResult({Op2VectorType}<{Op2BaseType}> op0, {Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> op4, {Op2VectorType}<{Op2BaseType}> op5, void* result, [CallerMemberName] string method = "") { {Op2BaseType}[] inArray0 = new {Op2BaseType}[Op2ElementCount]; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template index 5c7a4ea4b6ab42..f0438833d0e84f 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template @@ -24,29 +24,37 @@ namespace JIT.HardwareIntrinsics.Arm { var test = new VectorLookup_2Test__{Method}{RetBaseType}(); - // Validates basic functionality works, using Unsafe.Read - test.RunBasicScenario_UnsafeRead(); + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); - // Validates calling via reflection works, using Unsafe.Read - test.RunReflectionScenario_UnsafeRead(); + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); - // Validates passing a static member works - test.RunClsVarScenario(); + // Validates passing a static member works + test.RunClsVarScenario(); - // Validates passing a local works, using Unsafe.Read - test.RunLclVarScenario_UnsafeRead(); + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); - // Validates passing the field of a local class works - test.RunClassLclFldScenario(); + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); - // Validates passing an instance member of a class works - test.RunClassFldScenario(); + // Validates passing an instance member of a class works + test.RunClassFldScenario(); - // Validates passing the field of a local struct works - test.RunStructLclFldScenario(); + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); - // Validates passing an instance member of a struct works - test.RunStructFldScenario(); + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } if (!test.Succeeded) { @@ -194,6 +202,8 @@ namespace JIT.HardwareIntrinsics.Arm _dataTable = new DataTable(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); } + public bool IsSupported => {Isa}.IsSupported; + public bool Succeeded { get; set; } public void RunBasicScenario_UnsafeRead() @@ -311,6 +321,27 @@ namespace JIT.HardwareIntrinsics.Arm test.RunStructFldScenario(this); } + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op2VectorType}<{Op2BaseType}> op3, void* result, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template index 8c73c37dcec0b7..19de6b77ae4562 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template @@ -24,29 +24,37 @@ namespace JIT.HardwareIntrinsics.Arm { var test = new VectorLookup_3Test__{Method}{RetBaseType}(); - // Validates basic functionality works, using Unsafe.Read - test.RunBasicScenario_UnsafeRead(); + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); - // Validates calling via reflection works, using Unsafe.Read - test.RunReflectionScenario_UnsafeRead(); + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); - // Validates passing a static member works - test.RunClsVarScenario(); + // Validates passing a static member works + test.RunClsVarScenario(); - // Validates passing a local works, using Unsafe.Read - test.RunLclVarScenario_UnsafeRead(); + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); - // Validates passing the field of a local class works - test.RunClassLclFldScenario(); + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); - // Validates passing an instance member of a class works - test.RunClassFldScenario(); + // Validates passing an instance member of a class works + test.RunClassFldScenario(); - // Validates passing the field of a local struct works - test.RunStructLclFldScenario(); + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); - // Validates passing an instance member of a struct works - test.RunStructFldScenario(); + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } if (!test.Succeeded) { @@ -213,6 +221,8 @@ namespace JIT.HardwareIntrinsics.Arm _dataTable = new DataTable(_data1, _data2, _data3, _data4, new {RetBaseType}[RetElementCount], LargestVectorSize); } + public bool IsSupported => {Isa}.IsSupported; + public bool Succeeded { get; set; } public void RunBasicScenario_UnsafeRead() @@ -336,6 +346,27 @@ namespace JIT.HardwareIntrinsics.Arm test.RunStructFldScenario(this); } + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op2VectorType}<{Op2BaseType}> op4, void* result, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template index 8bda76b143c6ad..3bb0f695dd32cc 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template @@ -24,29 +24,37 @@ namespace JIT.HardwareIntrinsics.Arm { var test = new VectorLookup_4Test__{Method}{RetBaseType}(); - // Validates basic functionality works, using Unsafe.Read - test.RunBasicScenario_UnsafeRead(); + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); - // Validates calling via reflection works, using Unsafe.Read - test.RunReflectionScenario_UnsafeRead(); + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); - // Validates passing a static member works - test.RunClsVarScenario(); + // Validates passing a static member works + test.RunClsVarScenario(); - // Validates passing a local works, using Unsafe.Read - test.RunLclVarScenario_UnsafeRead(); + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); - // Validates passing the field of a local class works - test.RunClassLclFldScenario(); + // Validates passing the field of a local class works + test.RunClassLclFldScenario(); - // Validates passing an instance member of a class works - test.RunClassFldScenario(); + // Validates passing an instance member of a class works + test.RunClassFldScenario(); - // Validates passing the field of a local struct works - test.RunStructLclFldScenario(); + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); - // Validates passing an instance member of a struct works - test.RunStructFldScenario(); + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } if (!test.Succeeded) { @@ -232,6 +240,8 @@ namespace JIT.HardwareIntrinsics.Arm _dataTable = new DataTable(_data1, _data2, _data3, _data4, _data5, new {RetBaseType}[RetElementCount], LargestVectorSize); } + public bool IsSupported => {Isa}.IsSupported; + public bool Succeeded { get; set; } public void RunBasicScenario_UnsafeRead() @@ -361,6 +371,27 @@ namespace JIT.HardwareIntrinsics.Arm test.RunStructFldScenario(this); } + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op1VectorType}<{Op1BaseType}> op2, {Op1VectorType}<{Op1BaseType}> op3, {Op1VectorType}<{Op1BaseType}> op4, {Op2VectorType}<{Op2BaseType}> op5, void* result, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; From 53c91f09ca640f0bd83d835f5e8aadba6e58cd15 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 29 Mar 2023 22:41:53 -0700 Subject: [PATCH 118/125] Fix the test cases for RunReflectionScenario_UnsafeRead() --- .../VectorLookupExtension_2Test.template | 16 ++------------ .../VectorLookupExtension_3Test.template | 19 ++-------------- .../VectorLookupExtension_4Test.template | 22 ++----------------- .../Arm/Shared/VectorLookup_2Test.template | 15 ++----------- .../Arm/Shared/VectorLookup_3Test.template | 18 ++------------- .../Arm/Shared/VectorLookup_4Test.template | 21 ++---------------- 6 files changed, 12 insertions(+), 99 deletions(-) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template index 9bb30aefb30165..a9b3b45ec11b32 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_2Test.template @@ -246,21 +246,10 @@ namespace JIT.HardwareIntrinsics.Arm var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op2VectorType}<{Op2BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), typeof({Op2VectorType}<{Op2BaseType}>) }); - if (method is null) - { - method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { - typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) - }); - } - if (method.IsGenericMethodDefinition) { method = method.MakeGenericMethod(typeof({RetBaseType})); @@ -268,8 +257,7 @@ namespace JIT.HardwareIntrinsics.Arm var result = method.Invoke(null, new object[] { Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr)), Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) }); diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template index 442b903788ca8a..835bffa4afee00 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_3Test.template @@ -266,23 +266,10 @@ namespace JIT.HardwareIntrinsics.Arm var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op2VectorType}<{Op2BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), typeof({Op2VectorType}<{Op2BaseType}>) }); - if (method is null) - { - method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { - typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) - }); - } - if (method.IsGenericMethodDefinition) { method = method.MakeGenericMethod(typeof({RetBaseType})); @@ -290,9 +277,7 @@ namespace JIT.HardwareIntrinsics.Arm var result = method.Invoke(null, new object[] { Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr)), Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr) }); diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template index 21bef8c7f049b6..c8a59cbc97f445 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template @@ -286,25 +286,10 @@ namespace JIT.HardwareIntrinsics.Arm var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op2VectorType}<{Op2BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), typeof({Op2VectorType}<{Op2BaseType}>) }); - if (method is null) - { - method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) - }); - } - if (method.IsGenericMethodDefinition) { method = method.MakeGenericMethod(typeof({RetBaseType})); @@ -312,10 +297,7 @@ namespace JIT.HardwareIntrinsics.Arm var result = method.Invoke(null, new object[] { Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray0Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr)), Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr) }); diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template index f0438833d0e84f..5d06cc9d4924fa 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_2Test.template @@ -225,28 +225,17 @@ namespace JIT.HardwareIntrinsics.Arm TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), typeof({Op2VectorType}<{Op2BaseType}>) }); - if (method is null) - { - method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) - }); - } - if (method.IsGenericMethodDefinition) { method = method.MakeGenericMethod(typeof({RetBaseType})); } var result = method.Invoke(null, new object[] { - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr)), Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) }); diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template index 19de6b77ae4562..a693f71e202477 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_3Test.template @@ -245,31 +245,17 @@ namespace JIT.HardwareIntrinsics.Arm TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), typeof({Op2VectorType}<{Op2BaseType}>) }); - if (method is null) - { - method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) - }); - } - if (method.IsGenericMethodDefinition) { method = method.MakeGenericMethod(typeof({RetBaseType})); } var result = method.Invoke(null, new object[] { - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr)), Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray4Ptr) }); diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template index 3bb0f695dd32cc..ba872cbe23b40e 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookup_4Test.template @@ -265,34 +265,17 @@ namespace JIT.HardwareIntrinsics.Arm TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); var method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), - typeof({Op1VectorType}<{Op1BaseType}>), + typeof(({Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>, {Op1VectorType}<{Op1BaseType}>)), typeof({Op2VectorType}<{Op2BaseType}>) }); - if (method is null) - { - method = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), 1, new Type[] { - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op1VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), - typeof({Op2VectorType}<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) - }); - } - if (method.IsGenericMethodDefinition) { method = method.MakeGenericMethod(typeof({RetBaseType})); } var result = method.Invoke(null, new object[] { - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), - Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr), + (Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray4Ptr)), Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray5Ptr) }); From 7d168b2f123597f2e04a02376c5754cd763a2999 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 29 Mar 2023 23:17:40 -0700 Subject: [PATCH 119/125] Review feedback --- src/coreclr/jit/gentree.cpp | 11 ++++ src/coreclr/jit/lsra.cpp | 10 +-- src/coreclr/jit/lsra.h | 10 +-- src/coreclr/jit/lsraarm64.cpp | 112 +++++++++++++++++++--------------- src/coreclr/jit/lsrabuild.cpp | 2 +- 5 files changed, 87 insertions(+), 58 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 0438ff15bdbe6d..269907cdb4397c 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -23955,6 +23955,17 @@ GenTree* Compiler::gtNewSimdWithElementNode(var_types type, } #ifdef TARGET_ARM64 +//------------------------------------------------------------------------ +// gtConvertTableOpToFieldList: Convert a operand that represents table of rows into +// field list, where each field represents a row in the table. +// +// Arguments: +// op -- Operand to convert. +// fieldCount -- Number of fields or rows present. +// +// Return Value: +// The GenTreeFieldList node. +// GenTreeFieldList* Compiler::gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount) { LclVarDsc* opVarDsc = lvaGetDesc(op->AsLclVar()); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 7f834e15eac340..1cb0facc824e87 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -500,7 +500,7 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) #ifdef TARGET_ARM64 if ((refPosition != nullptr) && refPosition->isFirstRefPositionOfConsecutiveRegisters()) { - mask |= LsraExtraFPSetForConsecutive; + mask |= LsraLimitFPSetForConsecutive; } #endif } @@ -5367,7 +5367,7 @@ void LinearScan::allocateRegisters() currentRefPosition.registerAssignment = assignedRegBit; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, currentInterval, assignedRegister)); - setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); + assignConsecutiveRegisters(¤tRefPosition, assignedRegister); } else { @@ -5437,7 +5437,7 @@ void LinearScan::allocateRegisters() currentRefPosition.registerAssignment = assignedRegBit; } - setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); + assignConsecutiveRegisters(¤tRefPosition, copyReg); continue; } } @@ -5493,7 +5493,7 @@ void LinearScan::allocateRegisters() // If the first RefPosition was not assigned to the register that we wanted, we added // a copyReg for it. Allocate subsequent RefPositions with the consecutive // registers. - setNextConsecutiveRegisterAssignment(¤tRefPosition, copyReg); + assignConsecutiveRegisters(¤tRefPosition, copyReg); } // For consecutive register, it doesn't matter what the assigned register was. @@ -5660,7 +5660,7 @@ void LinearScan::allocateRegisters() allocateReg(currentInterval, ¤tRefPosition DEBUG_ARG(®isterScore)); if (currentRefPosition.isFirstRefPositionOfConsecutiveRegisters()) { - setNextConsecutiveRegisterAssignment(¤tRefPosition, assignedRegister); + assignConsecutiveRegisters(¤tRefPosition, assignedRegister); } } else diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 7c5b803f9abf96..090841d007f7c4 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -775,9 +775,11 @@ class LinearScan : public LinearScanInterface static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5); static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17); #elif defined(TARGET_ARM64) - static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); - static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); - static const regMaskTP LsraExtraFPSetForConsecutive = (RBM_V3 | RBM_V5 | RBM_V7); + static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20); + static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9); + // LsraLimitFPSetForConsecutive is used for stress mode and gives few extra registers to satisfy + // the requirements for allocating consecutive registers. + static const regMaskTP LsraLimitFPSetForConsecutive = (RBM_V3 | RBM_V5 | RBM_V7); #elif defined(TARGET_X86) static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI); static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7); @@ -1204,7 +1206,7 @@ class LinearScan : public LinearScanInterface #if defined(TARGET_ARM64) bool canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); - void setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned); + void assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned); regMaskTP getConsecutiveCandidates(regMaskTP candidates, RefPosition* refPosition, regMaskTP* busyCandidates); regMaskTP filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 15844675423149..66d6d54b5f8a83 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -26,13 +26,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #include "lower.h" //------------------------------------------------------------------------ -// getNextConsecutiveRefPosition: Get the next subsequent refPosition. +// getNextConsecutiveRefPosition: Get the next subsequent RefPosition. // // Arguments: -// refPosition - The refposition for which we need to find next refposition +// refPosition - The RefPosition for which we need to find the next RefPosition. // // Return Value: -// The next refPosition or nullptr if there is not one. +// The next RefPosition or nullptr if there is not one. // RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) { @@ -45,23 +45,23 @@ RefPosition* LinearScan::getNextConsecutiveRefPosition(RefPosition* refPosition) } //------------------------------------------------------------------------ -// setNextConsecutiveRegisterAssignment: For subsequent refPositions, set the register +// assignConsecutiveRegisters: For subsequent RefPositions, set the register // requirement to be the consecutive register(s) of the register that is assigned to // the firstRefPosition. // If one of the subsequent RefPosition is RefTypeUpperVectorRestore, sets the // registerAssignment to not include any of the consecutive registers that are being -// assigned to the RefTypeUse refpositions. +// assigned to the RefTypeUse RefPositions. // // Arguments: -// firstRefPosition - First refPosition of the series of consecutive registers. -// firstRegAssigned - Register assigned to the first refposition. +// firstRefPosition - First RefPosition of the series of consecutive registers. +// firstRegAssigned - Register assigned to the first RefPosition. // // Note: // This method will set the registerAssignment of subsequent RefPositions with consecutive registers. // Some of the registers could be busy, and they will be spilled. We would end up with busy registers if // we did not find free consecutive registers. // -void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosition, regNumber firstRegAssigned) +void LinearScan::assignConsecutiveRegisters(RefPosition* firstRefPosition, regNumber firstRegAssigned) { assert(compiler->info.compNeedsConsecutiveRegisters); assert(firstRefPosition->assignedReg() == firstRegAssigned); @@ -71,7 +71,7 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit RefPosition* consecutiveRefPosition = getNextConsecutiveRefPosition(firstRefPosition); regNumber regToAssign = firstRegAssigned == REG_FP_LAST ? REG_FP_FIRST : REG_NEXT(firstRegAssigned); - // First refposition should always start with RefTypeUse + // First RefPosition should always start with RefTypeUse assert(firstRefPosition->refType != RefTypeUpperVectorRestore); INDEBUG(int refPosCount = 1); @@ -115,8 +115,8 @@ void LinearScan::setNextConsecutiveRegisterAssignment(RefPosition* firstRefPosit // consecutive registers are free or are already assigned to the subsequent RefPositions. // // Arguments: -// firstRefPosition - First refPosition of the series of consecutive registers. -// firstRegAssigned - Register assigned to the first refposition. +// firstRefPosition - First RefPosition of the series of consecutive registers. +// firstRegAssigned - Register assigned to the first RefPosition. // // Returns: // True if all the consecutive registers starting from `firstRegAssigned` are assignable. @@ -127,7 +127,8 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition int registersCount = firstRefPosition->regCount; RefPosition* nextRefPosition = firstRefPosition; regNumber regToAssign = firstRegAssigned; - assert(compiler->info.compNeedsConsecutiveRegisters && registersCount > 1); + assert(compiler->info.compNeedsConsecutiveRegisters); + assert(registersCount > 1); assert(emitter::isVectorRegister(firstRegAssigned)); int i = 1; @@ -163,7 +164,7 @@ bool LinearScan::canAssignNextConsecutiveRegisters(RefPosition* firstRefPosition // registers are available in it, and if yes, returns first bit set of every possible series. // // Arguments: -// candidates - Set of availble candidates. +// candidates - Set of available candidates. // registersNeeded - Number of consecutive registers needed. // allConsecutiveCandidates - Mask returned containing all bits set for possible consecutive register candidates. // @@ -177,7 +178,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, { if (BitOperations::PopCount(candidates) < registersNeeded) { - // There is no way the register demanded can be satisfied for this refposition + // There is no way the register demanded can be satisfied for this RefPosition // based on the candidates from which it can allocate a register. return RBM_NONE; } @@ -228,7 +229,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1; - // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available + // Anything between regAvailableStart and regAvailableEnd is the range of consecutive registers available. // If they are equal to or greater than our register requirements, then add all of them to the result. if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) { @@ -237,7 +238,8 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, currAvailableRegs &= ~endMask; } while (currAvailableRegs != RBM_NONE); - if ((candidates & 0x8000000100000000) == 0x8000000100000000) + regMaskTP v0_v31_mask = RBM_V0 | RBM_V31; + if ((candidates & v0_v31_mask) == v0_v31_mask) { // Finally, check for round robin case where sequence of last register // round to first register is available. @@ -250,41 +252,55 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, switch (registersNeeded) { case 2: - if ((candidates & 0x8000000100000000) != RBM_NONE) + { + if ((candidates & v0_v31_mask) != RBM_NONE) { - consecutiveResult |= 0x8000000000000000; - overallResult |= 0x8000000100000000; + consecutiveResult |= RBM_V31; + overallResult |= v0_v31_mask; } break; + } case 3: - if ((candidates & 0xC000000100000000) != RBM_NONE) + { + regMaskTP v0_v30_v31_mask = RBM_V0 | RBM_V30 | RBM_V31; + if ((candidates & v0_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= 0x4000000000000000; - overallResult |= 0xC000000100000000; + consecutiveResult |= RBM_V30; + overallResult |= v0_v30_v31_mask; } - if ((candidates & 0x8000000300000000) != RBM_NONE) + + regMaskTP v0_v1_v31_mask = RBM_V0 | RBM_V1 | RBM_V31; + if ((candidates & v0_v1_v31_mask) != RBM_NONE) { - consecutiveResult |= 0x8000000000000000; - overallResult |= 0x8000000300000000; + consecutiveResult |= RBM_V31; + overallResult |= v0_v1_v31_mask; } break; + } case 4: - if ((candidates & 0xE000000100000000) != RBM_NONE) + { + regMaskTP v0_v29_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + if ((candidates & v0_v29_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= 0x2000000000000000; - overallResult |= 0xE000000100000000; + consecutiveResult |= RBM_V29; + overallResult |= v0_v29_v30_v31_mask; } - if ((candidates & 0xC000000300000000) != RBM_NONE) + + regMaskTP v0_v1_v30_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + if ((candidates & v0_v1_v30_v31_mask) != RBM_NONE) { - consecutiveResult |= 0x4000000000000000; - overallResult |= 0xC000000300000000; + consecutiveResult |= RBM_V30; + overallResult |= v0_v1_v30_v31_mask; } - if ((candidates & 0x8000000700000000) != RBM_NONE) + + regMaskTP v0_v1_v2_v31_mask = RBM_V0 | RBM_V29 | RBM_V30 | RBM_V31; + if ((candidates & v0_v1_v2_v31_mask) != RBM_NONE) { - consecutiveResult |= 0x8000000000000000; - overallResult |= 0x8000000700000000; + consecutiveResult |= RBM_V31; + overallResult |= v0_v1_v2_v31_mask; } break; + } default: assert(!"Unexpected registersNeeded\n"); } @@ -296,13 +312,13 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, //------------------------------------------------------------------------ // getConsecutiveCandidates: Returns the mask of all the consecutive candidates -// for given refPosition. For first RefPosition of a series of refpositions that needs +// for given RefPosition. For first RefPosition of a series of RefPositions that needs // consecutive registers, then returns only the mask such that it satisfies the need // of having free consecutive registers. If free consecutive registers are not available // it finds such a series that needs fewer registers spilling. // // Arguments: -// allCandidates - Register assigned to the first refposition. +// allCandidates - Register assigned to the first RefPosition. // refPosition - Number of registers to check. // // Returns: @@ -333,9 +349,9 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, regMaskTP consecutiveResultForFree = filterConsecutiveCandidates(freeCandidates, registersNeeded, &overallResult); if (consecutiveResultForFree != RBM_NONE) { - // One last time, check if subsequent refpositions (all refpositions except the first for which + // One last time, check if subsequent RefPositions (all RefPositions except the first for which // we assigned above) already have consecutive registers assigned. If yes, and if one of the - // register out of the `consecutiveResult` is available for the first refposition, then just use + // register out of the `consecutiveResult` is available for the first RefPosition, then just use // that. This will avoid unnecessary copies. regNumber firstRegNum = REG_NA; @@ -399,19 +415,19 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // In other words, try to find register sequence that needs fewer registers to be spilled. This // will give optimal CQ. // - // 2. Check if some of the refpositions in the series are already in *somewhat* consecutive registers + // 2. Check if some of the RefPositions in the series are already in *somewhat* consecutive registers // and if yes, assign that register sequence. That way, we will avoid copying values of - // refpositions that are already positioned in the desired registers. Checking this is beneficial + // RefPositions that are already positioned in the desired registers. Checking this is beneficial // only if it can happen frequently. So for RefPositions , it should // be that, RP# 6 is already in V14 and RP# 8 is already in V16. But this can be rare (not tested). // In future, if we see such cases being hit, we could use this heuristics. // // 3. Give one of the free register to the first position and the algorithm will - // give the subsequent consecutive registers (free or busy) to the remaining refpositions + // give the subsequent consecutive registers (free or busy) to the remaining RefPositions // of the series. This may not give optimal CQ however. // // 4. Return the set of available registers and let selection heuristics pick one of them to get - // assigned to the first refposition. Remaining refpositions will be assigned to the subsequent + // assigned to the first RefPosition. Remaining RefPositions will be assigned to the subsequent // registers (if busy, they will be spilled), similar to #3 above and will not give optimal CQ. // // @@ -427,7 +443,7 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // We did not find free consecutive candidates, however we found some registers among the `allCandidates` that // are mix of free and busy. Since `busyCandidates` just has bit set for first register of such series, return // the mask that starts with free register, if possible. The busy registers will be spilled during assignment of - // subsequent refposition. + // subsequent RefPosition. *busyCandidates = mixConsecutiveResult; } @@ -1559,7 +1575,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // the RefPositions created, it sets the `needsConsecutive` flag so it can be used to // identify these RefPositions during allocation. // -// It also populates a `refPositionMap` to access the subsequent RefPositions from +// It also populates a `RefPositionMap` to access the subsequent RefPositions from // a given RefPosition. This was preferred rather than adding a field in RefPosition // for this purpose. // @@ -1607,7 +1623,7 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) // as well so during register assignment, we could visit it and // make sure that it doesn't get assigned one of register that is part // of consecutive registers we are allocating for this treeNode. - // See setNextConsecutiveRegisterAssignment(). + // See assignConsecutiveRegisters(). restoreRefPos->needsConsecutive = true; restoreRefPos->regCount = 0; if (firstRefPos == nullptr) @@ -1615,7 +1631,7 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) // Always set the non UpperVectorRestore as the firstRefPos. // UpperVectorRestore can be assigned to a different independent // register. - // See TODO-CQ in setNextConsecutiveRegisterAssignment(). + // See TODO-CQ in assignConsecutiveRegisters(). firstRefPos = currRefPos; } refPositionMap->Set(lastRefPos, restoreRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); @@ -1637,8 +1653,8 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) regCount++; } - // Just `regCount` to actual registers count for first ref-position. - // For others, set 0 so we can identify that this is non-first refposition. + // Set `regCount` to actual consecutive registers count for first ref-position. + // For others, set 0 so we can identify that this is non-first RefPosition. firstRefPos->regCount = regCount; #ifdef DEBUG diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index ceca2bbfdd0099..d1cf91f411cfcd 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -1862,7 +1862,7 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc #ifdef TARGET_ARM64 if (newRefPosition->isFirstRefPositionOfConsecutiveRegisters()) { - newRefPosition->registerAssignment |= LsraExtraFPSetForConsecutive; + newRefPosition->registerAssignment |= LsraLimitFPSetForConsecutive; } #endif if ((newRefPosition->registerAssignment != oldAssignment) && (newRefPosition->refType == RefTypeUse) && From 7cffe7a43ce03e41204b8936e08f47cb4865ad4e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 30 Mar 2023 00:02:43 -0700 Subject: [PATCH 120/125] wip --- src/coreclr/jit/lsra.h | 1 + src/coreclr/jit/lsraarm64.cpp | 77 ++++++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 090841d007f7c4..a47f19b8b96d81 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1211,6 +1211,7 @@ class LinearScan : public LinearScanInterface regMaskTP filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, regMaskTP* allConsecutiveCandidates); + regMaskTP filterConsecutiveCandidatesForSpill(regMaskTP availableRegisters, regMaskTP consecutiveCandidates, unsigned int registersNeeded); #endif // TARGET_ARM64 regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 66d6d54b5f8a83..1dd6f4bcd77d68 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -186,6 +186,8 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, regMaskTP currAvailableRegs = candidates; regMaskTP overallResult = RBM_NONE; regMaskTP consecutiveResult = RBM_NONE; + + // TODO: What about this? regMaskTP busyRegsInThisLocation = regsBusyUntilKill | regsInUseThisLocation; // At this point, for 'n' registers requirement, if Rm, Rm+1, Rm+2, ..., Rm+k-1 are @@ -214,7 +216,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, regMaskTP maskProcessed = ~(currAvailableRegs | startMask); // From regAvailableStart, find the first unavailable register (bit `0`). - if (maskProcessed == 0) + if (maskProcessed == RBM_NONE) { regAvailableEndIndex = 64; if ((regAvailableEndIndex - regAvailableStartIndex) >= registersNeeded) @@ -310,6 +312,57 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, return consecutiveResult; } + +regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP availableRegisters, + regMaskTP consecutiveCandidates, + unsigned int registersNeeded) +{ + regMaskTP consecutiveResult = RBM_NONE; + regMaskTP unprocessedRegs = consecutiveCandidates; + DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; + int maxSpillRegs = registersNeeded; + regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; + do + { + // From LSB, find the first available register (bit `1`) + BitScanForward64(®AvailableStartIndex, static_cast(unprocessedRegs)); + + regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; + + // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. + regMaskTP maskProcessed = ~(unprocessedRegs | startMask); + + // From regAvailableStart, find the first unavailable register (bit `0`). + if (maskProcessed == RBM_NONE) + { + regAvailableEndIndex = 64; + // We won't be here, if there was alread + assert((regAvailableEndIndex - regAvailableStartIndex) < registersNeeded); + } + else + { + //regMaskTP maskForCurRange = registersNeededMask << regAvailableStartIndex; + + //maskForCurRange = maskForCurRange & availableRegisters; + //if (maskForCurRange != RBM_NONE) + //{ + // int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange) + 1; + // if (curSpillRegs < maxSpillRegs) + // { + // // We found a series that will need fewer registers to be spilled. + // // Reset whatever we found so far and start accumulating the result again. + // consecutiveResultForBusy = RBM_NONE; + // maxSpillRegs = curSpillRegs; + // } + + // consecutiveResultForBusy |= 1ULL << regAvailableStartIndex; + //} + } + unprocessedRegs &= 0; // Just set the `regAvailableStartIndex` bit 0. + } while (true); + return RBM_NONE; +} + //------------------------------------------------------------------------ // getConsecutiveCandidates: Returns the mask of all the consecutive candidates // for given RefPosition. For first RefPosition of a series of RefPositions that needs @@ -436,7 +489,27 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, // try_FAR_NEXT_REF(), etc. here which would complicate things. Instead, we just go with option# 1 and select // registers based on fewer number of registers that has to be spilled. // - regMaskTP consecutiveResultForBusy = filterConsecutiveCandidates(allCandidates, registersNeeded, &overallResult); + regMaskTP overallResultForBusy; + regMaskTP consecutiveResultForBusy = + filterConsecutiveCandidates(allCandidates, registersNeeded, &overallResultForBusy); + + regMaskTP overallResultForSpill = m_AvailableRegs & overallResultForBusy; + if (overallResultForSpill != RBM_NONE) + { + // `overallResultForBusy` contains the mask of entire series that can be the consecutive candidates. + // If there is an overlap of that with free registers, then try to find a series that will need least + // registers spilling as mentioned in #1 above. + + regMaskTP optimalConsecutiveResultForBusy = + filterConsecutiveCandidatesForSpill(consecutiveResultForBusy, registersNeeded); + + if (optimalConsecutiveResultForBusy != RBM_NONE) + { + *busyCandidates = optimalConsecutiveResultForBusy; + // TODO: What to do with mixConsecutiveResult? + } + } + regMaskTP mixConsecutiveResult = m_AvailableRegs & consecutiveResultForBusy; if (mixConsecutiveResult != RBM_NONE) { From 0dc4ea6e25f081f6495aa7f23dacf99417cea4d8 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 30 Mar 2023 10:22:50 -0700 Subject: [PATCH 121/125] fix a typo in test case --- .../Arm/Shared/VectorLookupExtension_4Test.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template index c8a59cbc97f445..f06653ab4ef01b 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/VectorLookupExtension_4Test.template @@ -250,7 +250,7 @@ namespace JIT.HardwareIntrinsics.Arm for (var i = 0; i < Op2ElementCount; i++) { _data5[i] = {NextValueOp2}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld5), ref Unsafe.As<{Op2BaseType}, byte>(ref _data5[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data0[i] = {NextValueOp0}; } + for (var i = 0; i < Op2ElementCount; i++) { _data0[i] = {NextValueOp0}; } for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } for (var i = 0; i < Op1ElementCount; i++) { _data3[i] = {NextValueOp1}; } From 6d9e1367665870130a86034e5e9bfe69b960b435 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 30 Mar 2023 16:36:57 -0700 Subject: [PATCH 122/125] Add filterConsecutiveCandidatesForSpill() to select range that needs fewer register spilling --- src/coreclr/jit/lsra.cpp | 4 + src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 144 ++++++++++++++++++++-------------- 3 files changed, 92 insertions(+), 58 deletions(-) diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index c0b7ac5e6393c3..39e8e155bca8b8 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -12271,6 +12271,10 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* currentInterval, { candidates = busyConsecutiveCandidates; } + else + { + assert(busyConsecutiveCandidates == RBM_NONE); + } } else { diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index c0f859940b1de7..19bd5a99b4f5ed 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1226,7 +1226,7 @@ class LinearScan : public LinearScanInterface regMaskTP filterConsecutiveCandidates(regMaskTP candidates, unsigned int registersNeeded, regMaskTP* allConsecutiveCandidates); - regMaskTP filterConsecutiveCandidatesForSpill(regMaskTP availableRegisters, regMaskTP consecutiveCandidates, unsigned int registersNeeded); + regMaskTP filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded); #endif // TARGET_ARM64 regMaskTP getFreeCandidates(regMaskTP candidates ARM_ARG(var_types regType)) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 1dd6f4bcd77d68..f04a0870b06d20 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -183,12 +183,9 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, return RBM_NONE; } - regMaskTP currAvailableRegs = candidates; - regMaskTP overallResult = RBM_NONE; - regMaskTP consecutiveResult = RBM_NONE; - - // TODO: What about this? - regMaskTP busyRegsInThisLocation = regsBusyUntilKill | regsInUseThisLocation; + regMaskTP currAvailableRegs = candidates; + regMaskTP overallResult = RBM_NONE; + regMaskTP consecutiveResult = RBM_NONE; // At this point, for 'n' registers requirement, if Rm, Rm+1, Rm+2, ..., Rm+k-1 are // available, create the mask only for Rm, Rm+1, ..., Rm+(k-n) to convey that it @@ -308,17 +305,31 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates, } } + // consecutiveResult should always be a subset of overallResult + assert((overallResult & consecutiveResult) == consecutiveResult); *allConsecutiveCandidates = overallResult; return consecutiveResult; } - -regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP availableRegisters, - regMaskTP consecutiveCandidates, - unsigned int registersNeeded) +//------------------------------------------------------------------------ +// filterConsecutiveCandidatesForSpill: Amoung the selected consecutiveCandidates, +// check if there are any ranges that would require fewer registers to spill +// and returns such mask. The return result would always be a subset of +// consecutiveCandidates. +// +// Arguments: +// consecutiveCandidates - Consecutive candidates to filter on. +// registersNeeded - Number of registers needed. +// +// Returns: +// Filtered candidates that needs fewer spilling. +// +regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveCandidates, unsigned int registersNeeded) { - regMaskTP consecutiveResult = RBM_NONE; - regMaskTP unprocessedRegs = consecutiveCandidates; + assert(consecutiveCandidates != RBM_NONE); + assert((registersNeeded >= 2) && (registersNeeded <= 4)); + regMaskTP consecutiveResultForBusy = RBM_NONE; + regMaskTP unprocessedRegs = consecutiveCandidates; DWORD regAvailableStartIndex = 0, regAvailableEndIndex = 0; int maxSpillRegs = registersNeeded; regMaskTP registersNeededMask = (1ULL << registersNeeded) - 1; @@ -327,40 +338,56 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP available // From LSB, find the first available register (bit `1`) BitScanForward64(®AvailableStartIndex, static_cast(unprocessedRegs)); - regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1; - - // Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`. - regMaskTP maskProcessed = ~(unprocessedRegs | startMask); + // For the current range, find how many registers are free vs. busy + regMaskTP maskForCurRange = RBM_NONE; + bool shouldCheckForRounding = false; + switch (registersNeeded) + { + case 2: + shouldCheckForRounding = (regAvailableStartIndex == 63); + break; + case 3: + shouldCheckForRounding = (regAvailableStartIndex >= 62); + break; + case 4: + shouldCheckForRounding = (regAvailableStartIndex >= 61); + break; + default: + assert("Unsupported registersNeeded\n"); + break; + } - // From regAvailableStart, find the first unavailable register (bit `0`). - if (maskProcessed == RBM_NONE) + if (shouldCheckForRounding) { - regAvailableEndIndex = 64; - // We won't be here, if there was alread - assert((regAvailableEndIndex - regAvailableStartIndex) < registersNeeded); + unsigned int roundedRegistersNeeded = registersNeeded - (63 - regAvailableStartIndex + 1); + maskForCurRange = (1ULL << roundedRegistersNeeded) - 1; } - else + + maskForCurRange |= (registersNeededMask << regAvailableStartIndex); + maskForCurRange &= m_AvailableRegs; + + if (maskForCurRange != RBM_NONE) { - //regMaskTP maskForCurRange = registersNeededMask << regAvailableStartIndex; - - //maskForCurRange = maskForCurRange & availableRegisters; - //if (maskForCurRange != RBM_NONE) - //{ - // int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange) + 1; - // if (curSpillRegs < maxSpillRegs) - // { - // // We found a series that will need fewer registers to be spilled. - // // Reset whatever we found so far and start accumulating the result again. - // consecutiveResultForBusy = RBM_NONE; - // maxSpillRegs = curSpillRegs; - // } - - // consecutiveResultForBusy |= 1ULL << regAvailableStartIndex; - //} + // In the given range, there are some free registers available. Calculate how many registers + // will need spilling if this range is picked. + + int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange); + if (curSpillRegs < maxSpillRegs) + { + consecutiveResultForBusy = 1ULL << regAvailableStartIndex; + maxSpillRegs = curSpillRegs; + } + else if (curSpillRegs == maxSpillRegs) + { + consecutiveResultForBusy |= 1ULL << regAvailableStartIndex; + } } - unprocessedRegs &= 0; // Just set the `regAvailableStartIndex` bit 0. - } while (true); - return RBM_NONE; + unprocessedRegs &= ~(1ULL << regAvailableStartIndex); + } while (unprocessedRegs != RBM_NONE); + + // consecutiveResultForBusy should always be a subset of consecutiveCandidates. + assert((consecutiveCandidates & consecutiveResultForBusy) == consecutiveResultForBusy); + return consecutiveResultForBusy; } //------------------------------------------------------------------------ @@ -372,10 +399,14 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP available // // Arguments: // allCandidates - Register assigned to the first RefPosition. -// refPosition - Number of registers to check. +// refPosition - Number of registers to check. +// busyCandidates - Register mask of free/busy registers. // // Returns: -// Register mask of consecutive registers. +// Register mask of free consecutive registers. If there are not enough free registers, +// or the free registers are not consecutive, then return RBM_NONE. In that case, +// `busyCandidates` will contain the register mask that can be assigned and will include +// both free and busy registers. // // Notes: // The consecutive registers mask includes just the bits of first registers or @@ -493,8 +524,10 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, regMaskTP consecutiveResultForBusy = filterConsecutiveCandidates(allCandidates, registersNeeded, &overallResultForBusy); - regMaskTP overallResultForSpill = m_AvailableRegs & overallResultForBusy; - if (overallResultForSpill != RBM_NONE) + *busyCandidates = consecutiveResultForBusy; + + // Check if we can further check better registers amoung consecutiveResultForBusy. + if ((m_AvailableRegs & overallResultForBusy) != RBM_NONE) { // `overallResultForBusy` contains the mask of entire series that can be the consecutive candidates. // If there is an overlap of that with free registers, then try to find a series that will need least @@ -506,21 +539,18 @@ regMaskTP LinearScan::getConsecutiveCandidates(regMaskTP allCandidates, if (optimalConsecutiveResultForBusy != RBM_NONE) { *busyCandidates = optimalConsecutiveResultForBusy; - // TODO: What to do with mixConsecutiveResult? + } + else if ((m_AvailableRegs & consecutiveResultForBusy) != RBM_NONE) + { + // We did not find free consecutive candidates, however we found some registers among the + // `allCandidates` that are mix of free and busy. Since `busyCandidates` just has bit set for first + // register of such series, return the mask that starts with free register, if possible. The busy + // registers will be spilled during assignment of subsequent RefPosition. + *busyCandidates = (m_AvailableRegs & consecutiveResultForBusy); } } - regMaskTP mixConsecutiveResult = m_AvailableRegs & consecutiveResultForBusy; - if (mixConsecutiveResult != RBM_NONE) - { - // We did not find free consecutive candidates, however we found some registers among the `allCandidates` that - // are mix of free and busy. Since `busyCandidates` just has bit set for first register of such series, return - // the mask that starts with free register, if possible. The busy registers will be spilled during assignment of - // subsequent RefPosition. - *busyCandidates = mixConsecutiveResult; - } - - *busyCandidates = consecutiveResultForBusy; + // Return RBM_NONE because there was no free candidates. return RBM_NONE; } //------------------------------------------------------------------------ From f247b3c6408c60b751999ca30710005d2db9a044 Mon Sep 17 00:00:00 2001 From: Zoltan Varga Date: Fri, 31 Mar 2023 01:54:01 -0400 Subject: [PATCH 123/125] Add mono support. --- src/mono/mono/mini/llvm-intrinsics.h | 6 +++ src/mono/mono/mini/mini-llvm.c | 57 +++++++++++++++++++++++++ src/mono/mono/mini/mini-ops.h | 8 ++++ src/mono/mono/mini/simd-intrinsics.c | 63 ++++++++++++++++++++++++---- 4 files changed, 127 insertions(+), 7 deletions(-) diff --git a/src/mono/mono/mini/llvm-intrinsics.h b/src/mono/mono/mini/llvm-intrinsics.h index 3c79c7aa59427e..dad84520297216 100644 --- a/src/mono/mono/mini/llvm-intrinsics.h +++ b/src/mono/mono/mini/llvm-intrinsics.h @@ -466,6 +466,12 @@ INTRINS_OVR_TAG(AARCH64_ADV_SIMD_SRI, aarch64_neon_vsri, Arm64, V64 | V128 | I1 INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBX1, aarch64_neon_tbx1, Arm64, V64 | V128 | I1) INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBL1, aarch64_neon_tbl1, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBX2, aarch64_neon_tbx2, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBL2, aarch64_neon_tbl2, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBX3, aarch64_neon_tbx3, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBL3, aarch64_neon_tbl3, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBX4, aarch64_neon_tbx4, Arm64, V64 | V128 | I1) +INTRINS_OVR_TAG(AARCH64_ADV_SIMD_TBL4, aarch64_neon_tbl4, Arm64, V64 | V128 | I1) INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_SDOT, aarch64_neon_sdot, Arm64, Arm64DotProd, V64 | V128 | I4) INTRINS_OVR_TAG_KIND(AARCH64_ADV_SIMD_UDOT, aarch64_neon_udot, Arm64, Arm64DotProd, V64 | V128 | I4) diff --git a/src/mono/mono/mini/mini-llvm.c b/src/mono/mono/mini/mini-llvm.c index 1d78c8dd81d825..2d70d15001317f 100644 --- a/src/mono/mono/mini/mini-llvm.c +++ b/src/mono/mono/mini/mini-llvm.c @@ -11312,6 +11312,63 @@ MONO_RESTORE_WARNING values [ins->dreg] = result; break; } + case OP_ARM64_TBL_INDIRECT: + case OP_ARM64_TBX_INDIRECT: { + int nvectors = ins->inst_c0; + guint32 *offsets = (guint32*)ins->inst_p1; + + LLVMValueRef args [16]; + + LLVMTypeRef etype = LLVMVectorType (LLVMInt8Type (), 16); + + int aindex = 0; + + LLVMValueRef table_val, default_values_val, indexes_val; + if (ins->opcode == OP_ARM64_TBX_INDIRECT) { + table_val = lhs; + default_values_val = rhs; + indexes_val = arg3; + args [aindex ++] = default_values_val; + } else { + table_val = lhs; + indexes_val = rhs; + } + + /* Load input vectors from memory */ + LLVMValueRef addr = convert (ctx, table_val, pointer_type (etype)); + for (int i = 0; i < nvectors; ++i) { + g_assert (offsets [i] % 16 == 0); + LLVMValueRef index = const_int32 (offsets [i] / 16); + LLVMValueRef ptr = LLVMBuildGEP2 (builder, etype, addr, &index, 1, ""); + args [aindex ++] = emit_load (builder, etype, ptr, "", FALSE); + } + args [aindex ++] = indexes_val; + g_assert (aindex < 16); + + IntrinsicId iid = (IntrinsicId)0; + if (ins->opcode == OP_ARM64_TBL_INDIRECT) { + switch (nvectors) { + case 2: iid = INTRINS_AARCH64_ADV_SIMD_TBL2; break; + case 3: iid = INTRINS_AARCH64_ADV_SIMD_TBL3; break; + case 4: iid = INTRINS_AARCH64_ADV_SIMD_TBL4; break; + default: + g_assert_not_reached (); + break; + } + } else { + switch (nvectors) { + case 2: iid = INTRINS_AARCH64_ADV_SIMD_TBX2; break; + case 3: iid = INTRINS_AARCH64_ADV_SIMD_TBX3; break; + case 4: iid = INTRINS_AARCH64_ADV_SIMD_TBX4; break; + default: + g_assert_not_reached (); + break; + } + } + llvm_ovr_tag_t ovr_tag = (LLVMGetVectorSize (LLVMTypeOf (indexes_val)) == 8 ? INTRIN_vector64 : INTRIN_vector128) | INTRIN_int8; + values [ins->dreg] = call_overloaded_intrins (ctx, iid, ovr_tag, args, ""); + break; + } case OP_XOP_OVR_X_X: { IntrinsicId iid = (IntrinsicId) ins->inst_c0; llvm_ovr_tag_t ovr_tag = ovr_tag_from_mono_vector_class (ins->klass); diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index 162711201ad142..e844b5cccac3df 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1755,6 +1755,14 @@ MINI_OP3(OP_ARM64_SQRDMLSH, "arm64_sqrdmlsh", XREG, XREG, XREG, XREG) MINI_OP3(OP_ARM64_SQRDMLSH_BYSCALAR, "arm64_sqrdmlsh_byscalar", XREG, XREG, XREG, XREG) MINI_OP3(OP_ARM64_SQRDMLSH_SCALAR, "arm64_sqrdmlsh_scalar", XREG, XREG, XREG, XREG) +/* + * sreg1 points to a memory area with the input vectors. + * inst_c0 is the number of vectors. + * inst_p1 points to an int array with the offsets inside the memory area. + */ +MINI_OP(OP_ARM64_TBL_INDIRECT, "arm64_tbl_indirect", XREG, IREG, XREG) +MINI_OP3(OP_ARM64_TBX_INDIRECT, "arm64_tbx_indirect", XREG, IREG, XREG, XREG) + #endif // TARGET_ARM64 MINI_OP(OP_FCVTL, "convert_to_higher_precision", XREG, XREG, NONE) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 946af21119cce8..cffd7b48557b2c 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -3562,13 +3562,62 @@ emit_arm64_intrinsics ( return ret; } case SN_VectorTableLookup: - if (!type_is_simd_vector (fsig->params [0]) || !type_is_simd_vector (fsig->params [1])) - return NULL; - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBL1, 0, fsig, args); - case SN_VectorTableLookupExtension: - if (!type_is_simd_vector (fsig->params [0]) || !type_is_simd_vector (fsig->params [1])) - return NULL; - return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBX1, 0, fsig, args); + case SN_VectorTableLookupExtension: { + if (type_is_simd_vector (fsig->params [0]) && type_is_simd_vector (fsig->params [1])) { + if (id == SN_VectorTableLookup) + return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBL1, 0, fsig, args); + else + return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X_X_X, INTRINS_AARCH64_ADV_SIMD_TBX1, 0, fsig, args); + } + + MonoInst *ins, *addr; + int tuple_argindex; + + if (id == SN_VectorTableLookup) + /* VectorTableLookup((Vector128, Vector128) table, Vector128 byteIndexes) */ + tuple_argindex = 0; + else + /* VectorTableLookupExtension(Vector128 defaultValues, (Vector128, Vector128) table, Vector128 byteIndexes */ + tuple_argindex = 1; + + /* + * These intrinsics have up to 5 inputs, and our IR can't model that, so save the inputs to the stack and have + * the LLVM implementation read them back. + */ + MonoType *tuple_type = fsig->params [tuple_argindex]; + g_assert (tuple_type->type == MONO_TYPE_GENERICINST); + MonoClass *tclass = mono_class_from_mono_type_internal (tuple_type); + mono_class_init_internal (tclass); + + MonoClassField *fields = m_class_get_fields (tclass); + int nfields = mono_class_get_field_count (tclass); + guint32 *offsets = mono_mempool_alloc0 (cfg->mempool, nfields * sizeof (guint32)); + for (int i = 0; i < mono_class_get_field_count (tclass); ++i) + offsets [i] = mono_field_get_offset (&fields [i]) - MONO_ABI_SIZEOF (MonoObject); + + int vreg = alloc_xreg (cfg); + NEW_VARLOADA_VREG (cfg, addr, vreg, tuple_type); + MONO_ADD_INS (cfg->cbb, addr); + + EMIT_NEW_STORE_MEMBASE_TYPE (cfg, ins, tuple_type, addr->dreg, 0, args [tuple_argindex]->dreg); + + MONO_INST_NEW (cfg, ins, id == SN_VectorTableLookup ? OP_ARM64_TBL_INDIRECT : OP_ARM64_TBX_INDIRECT); + ins->dreg = alloc_xreg (cfg); + ins->sreg1 = addr->dreg; + if (id == SN_VectorTableLookup) { + /* byteIndexes */ + ins->sreg2 = args [1]->dreg; + } else { + /* defaultValues */ + ins->sreg2 = args [0]->dreg; + /* byteIndexes */ + ins->sreg3 = args [2]->dreg; + } + ins->inst_c0 = nfields; + ins->inst_p1 = offsets; + MONO_ADD_INS (cfg->cbb, ins); + return ins; + } default: g_assert_not_reached (); } From e8d3ee5413d21321f34159a2c27f0c770154a68b Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Sat, 1 Apr 2023 13:35:29 -0700 Subject: [PATCH 124/125] Delay free the registers for VectorTableLookupExtension --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 5 +++ src/coreclr/jit/lsra.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 37 ++++++++++++++++++--- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 9636728f504f40..05ccf663db67ce 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1057,6 +1057,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) unsigned regCount = 0; op1Reg = intrin.op1->GetRegNum(); op3Reg = intrin.op3->GetRegNum(); + assert(targetReg != op3Reg); if (intrin.op2->OperIsFieldList()) { GenTreeFieldList* fieldList = intrin.op2->AsFieldList(); @@ -1069,7 +1070,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) #ifdef DEBUG GenTree* argNode = use.GetNode(); + + // registers should be consecutive assert(argReg == argNode->GetRegNum()); + // and they should not interfere with targetReg + assert(targetReg != argReg); argReg = REG_NEXT(argReg); #endif } diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 19bd5a99b4f5ed..373681085ce944 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2002,7 +2002,7 @@ class LinearScan : public LinearScanInterface #ifdef FEATURE_HW_INTRINSICS int BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCount); #ifdef TARGET_ARM64 - int BuildConsecutiveRegistersForUse(GenTree* treeNode); + int BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwNode = nullptr); #endif #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index f04a0870b06d20..72047f67a56b35 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1587,8 +1587,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou assert(intrin.op3 != nullptr); assert((intrin.id == NI_AdvSimd_VectorTableLookupExtension) || (intrin.id == NI_AdvSimd_Arm64_VectorTableLookupExtension)); - srcCount += BuildConsecutiveRegistersForUse(intrin.op2); - srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1) : BuildOperandUses(intrin.op3); + assert(isRMW); + srcCount += BuildConsecutiveRegistersForUse(intrin.op2, intrin.op1); + srcCount += BuildDelayFreeUses(intrin.op3, intrin.op1); } assert(dstCount == 1); buildInternalRegisterUses(); @@ -1684,13 +1685,24 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // // Arguments: // treeNode - The GT_HWINTRINSIC node of interest +// rmwNode - Read-modify-write node. // // Return Value: // The number of sources consumed by this node. // -int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) +int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode, GenTree* rmwNode) { - int srcCount = 0; + int srcCount = 0; + Interval* rmwInterval = nullptr; + bool rmwIsLastUse = false; + if ((rmwNode != nullptr)) + { + if (isCandidateLocalRef(rmwNode)) + { + rmwInterval = getIntervalForLocalVarNode(rmwNode->AsLclVar()); + rmwIsLastUse = rmwNode->AsLclVar()->IsLastUse(0); + } + } if (treeNode->OperIsFieldList()) { assert(compiler->info.compNeedsConsecutiveRegisters); @@ -1739,6 +1751,15 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) } refPositionMap->Set(lastRefPos, restoreRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); refPositionMap->Set(restoreRefPos, currRefPos, LinearScan::NextConsecutiveRefPositionsMap::Overwrite); + + if (rmwNode != nullptr) + { + // If we have rmwNode, determine if the restoreRefPos should be set to delay-free. + if ((restoreRefPos->getInterval() != rmwInterval) || (!rmwIsLastUse && !restoreRefPos->lastUse)) + { + setDelayFree(restoreRefPos); + } + } } else #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE @@ -1754,6 +1775,14 @@ int LinearScan::BuildConsecutiveRegistersForUse(GenTree* treeNode) lastRefPos = currRefPos; regCount++; + if (rmwNode != nullptr) + { + // If we have rmwNode, determine if the currRefPos should be set to delay-free. + if ((currRefPos->getInterval() != rmwInterval) || (!rmwIsLastUse && !currRefPos->lastUse)) + { + setDelayFree(currRefPos); + } + } } // Set `regCount` to actual consecutive registers count for first ref-position. From d778833e4b32a5767a11d973460d2ca41b33d119 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 3 Apr 2023 05:58:10 -0700 Subject: [PATCH 125/125] fix mono build error --- src/mono/mono/mini/simd-intrinsics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index ec4ee7db2634a4..ff70cab60f5b32 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -3602,7 +3602,7 @@ emit_arm64_intrinsics ( MonoClassField *fields = m_class_get_fields (tclass); int nfields = mono_class_get_field_count (tclass); guint32 *offsets = mono_mempool_alloc0 (cfg->mempool, nfields * sizeof (guint32)); - for (int i = 0; i < mono_class_get_field_count (tclass); ++i) + for (uint32_t i = 0; i < mono_class_get_field_count (tclass); ++i) offsets [i] = mono_field_get_offset (&fields [i]) - MONO_ABI_SIZEOF (MonoObject); int vreg = alloc_xreg (cfg);