-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Improve IndexOfAnyAsciiSearcher ARM throughput #78739
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
MihaZupan
merged 2 commits into
dotnet:main
from
MihaZupan:indexofanyvalues-arm64-byte-bug
Nov 23, 2022
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -490,26 +490,23 @@ private static Vector128<byte> IndexOfAnyLookup<TNegator, TOptimizations>(Vector | |
| // X86: Downcast every character using saturation. | ||
| // - Values <= 32767 result in min(value, 255). | ||
| // - Values > 32767 result in 0. Because of this we must do more work to handle needles that contain 0. | ||
| // ARM64: Take the low byte of each character. | ||
| // - All values result in (value & 0xFF). | ||
| // ARM64: Do narrowing saturation over unsigned values. | ||
| // - All values result in min(value, 255) | ||
| Vector128<byte> source = Sse2.IsSupported | ||
| ? Sse2.PackUnsignedSaturate(source0, source1) | ||
| : AdvSimd.Arm64.UnzipEven(source0.AsByte(), source1.AsByte()); | ||
| : AdvSimd.ExtractNarrowingSaturateUpper(AdvSimd.ExtractNarrowingSaturateLower(source0.AsUInt16()), source1.AsUInt16()); | ||
|
|
||
| Vector128<byte> result = IndexOfAnyLookupCore(source, bitmapLookup); | ||
|
|
||
| // On ARM64, we ignored the high byte of every character when packing (see above). | ||
| // The 'result' can therefore contain false positives - e.g. 0x141 would match 0x41 ('A'). | ||
| // On X86, PackUnsignedSaturate resulted in values becoming 0 for inputs above 32767. | ||
| // Any value above 32767 would therefore match against 0. If 0 is present in the needle, we must clear the false positives. | ||
| // In both cases, we can correct the result by clearing any bits that matched with a non-ascii source character. | ||
| if (AdvSimd.Arm64.IsSupported || TOptimizations.NeedleContainsZero) | ||
| if (TOptimizations.NeedleContainsZero) | ||
| { | ||
| Debug.Assert(Sse2.IsSupported); | ||
| Vector128<short> ascii0 = Vector128.LessThan(source0.AsUInt16(), Vector128.Create((ushort)128)).AsInt16(); | ||
| Vector128<short> ascii1 = Vector128.LessThan(source1.AsUInt16(), Vector128.Create((ushort)128)).AsInt16(); | ||
| Vector128<byte> ascii = Sse2.IsSupported | ||
| ? Sse2.PackSignedSaturate(ascii0, ascii1).AsByte() | ||
| : AdvSimd.Arm64.UnzipEven(ascii0.AsByte(), ascii1.AsByte()); | ||
| Vector128<byte> ascii = Sse2.PackSignedSaturate(ascii0, ascii1).AsByte(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: does the ascii local temporary help with anything?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think so, I just found it a bit more readable when initially writing this logic |
||
| result &= ascii; | ||
| } | ||
|
|
||
|
|
@@ -542,7 +539,13 @@ private static Vector128<byte> IndexOfAnyLookupCore(Vector128<byte> source, Vect | |
| ? source | ||
| : source & Vector128.Create((byte)0xF); | ||
|
|
||
| Vector128<byte> highNibbles = Vector128.ShiftRightLogical(source.AsInt32(), 4).AsByte() & Vector128.Create((byte)0xF); | ||
| // On ARM, we have an instruction for an arithmetic right shift of 1-byte signed values. | ||
| // The shift will map values above 127 to values above 16, which the shuffle will then map to 0. | ||
| // This is how we exclude non-ASCII values from results on ARM. | ||
| // On X86, use a 4-byte value shift with AND 15 to emulate a 1-byte value logical shift. | ||
| Vector128<byte> highNibbles = AdvSimd.IsSupported | ||
| ? AdvSimd.ShiftRightArithmetic(source.AsSByte(), 4).AsByte() | ||
| : Sse2.ShiftRightLogical(source.AsInt32(), 4).AsByte() & Vector128.Create((byte)0xF); | ||
|
|
||
| // The bitmapLookup represents a 8x16 table of bits, indicating whether a character is present in the needle. | ||
| // Lookup the rows via the lower nibble and the column via the higher nibble. | ||
|
|
||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test would blow up here on ARM before this change