From 0e185af11b3f35e34ebdf9aa1282f2ad9e8b0788 Mon Sep 17 00:00:00 2001 From: Adam Sitnik Date: Thu, 4 Aug 2022 14:58:48 +0200 Subject: [PATCH] port SpanHelpers.IndexOfAny(ref byte, byte, byte, int) to Vector128/256 --- .../src/System/SpanHelpers.Byte.cs | 97 ++++--------------- 1 file changed, 21 insertions(+), 76 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 06d29a5bba2846..854178c841aef2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -808,7 +808,7 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations nuint lengthToExamine = (nuint)(uint)length; - if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) + if (Vector128.IsHardwareAccelerated) { // Avx2 branch also operates on Sse2 sizes, so check is combined. nint vectorDiff = (nint)length - Vector128.Count; @@ -924,10 +924,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int // the end and forwards, which may overlap on an earlier compare. // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) + if (Vector128.IsHardwareAccelerated) { - int matches; - if (Avx2.IsSupported) + uint matches; + if (Vector256.IsHardwareAccelerated) { Vector256 search; // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 @@ -943,13 +943,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int // First time this checks again against 0, however we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector256(ref searchSpace, offset); + search = Vector256.LoadUnsafe(ref searchSpace, offset); // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search)).ExtractMostSignificantBits(); + // Note that ExtractMostSignificantBits has converted the equal vector elements into a set of bit flags, // So the bit position in 'matches' corresponds to the element offset. if (matches == 0) { @@ -962,13 +959,10 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int } // Move to Vector length from end for final compare - search = LoadVector256(ref searchSpace, lengthToExamine); + search = Vector256.LoadUnsafe(ref searchSpace, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search))); + matches = (Vector256.Equals(values0, search) | Vector256.Equals(values1, search)).ExtractMostSignificantBits(); if (matches == 0) { // None matched @@ -980,6 +974,7 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int } // Initial size check was done on method entry. + Vector128 compareResult; Debug.Assert(length >= Vector128.Count); { Vector128 search; @@ -988,37 +983,33 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int // First time this checks against 0 and we will move into final compare if it fails. while (lengthToExamine > offset) { - search = LoadVector128(ref searchSpace, offset); + search = Vector128.LoadUnsafe(ref searchSpace, offset); - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search); + + if (compareResult == Vector128.Zero) { // None matched offset += (nuint)Vector128.Count; continue; } + matches = compareResult.ExtractMostSignificantBits(); goto IntrinsicsMatch; } // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); + search = Vector128.LoadUnsafe(ref searchSpace, lengthToExamine); offset = lengthToExamine; // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search))); - if (matches == 0) + compareResult = Vector128.Equals(values0, search) | Vector128.Equals(values1, search); + + if (compareResult == Vector128.Zero) { // None matched goto NotFound; } + + matches = compareResult.ExtractMostSignificantBits(); } IntrinsicsMatch: @@ -1026,52 +1017,6 @@ public static int IndexOfAny(ref byte searchSpace, byte value0, byte value1, int offset += (nuint)BitOperations.TrailingZeroCount(matches); goto Found; } - else if (AdvSimd.Arm64.IsSupported) - { - Vector128 search; - Vector128 matches; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)); - - if (matches == Vector128.Zero) - { - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)); - - if (matches == Vector128.Zero) - { - // None matched - goto NotFound; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } else if (Vector.IsHardwareAccelerated) { Vector values0 = new Vector(value0);