-
Notifications
You must be signed in to change notification settings - Fork 5.3k
Improve XmlDictionaryWriter UTF8 encoding performance #73336
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
5d09005
63c760c
196ce48
65e7029
4d8078a
6e5aabb
70fa189
b34d259
5df5ae0
a790fbb
2b82ac8
301e531
5a21306
8a3de26
048cade
8297311
287e737
0d2a9bb
ab29682
251391f
a590739
46b6314
82f8880
d78aade
3b20be8
9c86b05
ccfb008
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,7 +2,10 @@ | |
| // The .NET Foundation licenses this file to you under the MIT license. | ||
|
|
||
| using System.IO; | ||
| using System.Numerics; | ||
| using System.Text; | ||
| using System.Runtime.Intrinsics; | ||
| using System.Runtime.Intrinsics.X86; | ||
| using System.Runtime.Serialization; | ||
| using System.Threading.Tasks; | ||
|
|
||
|
|
@@ -56,18 +59,6 @@ public int Position | |
| } | ||
| } | ||
|
|
||
| private int GetByteCount(char[] chars) | ||
| { | ||
| if (_encoding == null) | ||
| { | ||
| return s_UTF8Encoding.GetByteCount(chars); | ||
| } | ||
| else | ||
| { | ||
| return _encoding.GetByteCount(chars); | ||
| } | ||
| } | ||
|
|
||
| protected byte[] GetBuffer(int count, out int offset) | ||
| { | ||
| DiagnosticUtility.DebugAssert(count >= 0 && count <= bufferLength, ""); | ||
|
|
@@ -344,37 +335,54 @@ protected unsafe void UnsafeWriteUnicodeChars(char* chars, int charCount) | |
|
|
||
| protected unsafe int UnsafeGetUnicodeChars(char* chars, int charCount, byte[] buffer, int offset) | ||
| { | ||
| char* charsMax = chars + charCount; | ||
| while (chars < charsMax) | ||
| if (BitConverter.IsLittleEndian) | ||
| { | ||
| char value = *chars++; | ||
| buffer[offset++] = (byte)value; | ||
| value >>= 8; | ||
| buffer[offset++] = (byte)value; | ||
| new ReadOnlySpan<byte>((byte*)chars, 2 * charCount) | ||
| .CopyTo(buffer.AsSpan(offset)); | ||
| } | ||
| else | ||
| { | ||
| char* charsMax = chars + charCount; | ||
| while (chars < charsMax) | ||
| { | ||
| char value = *chars++; | ||
| buffer[offset++] = (byte)value; | ||
| buffer[offset++] = (byte)(value >> 8); | ||
| } | ||
Daniel-Svensson marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| return charCount * 2; | ||
| } | ||
|
|
||
| protected unsafe int UnsafeGetUTF8Length(char* chars, int charCount) | ||
| { | ||
| char* charsMax = chars + charCount; | ||
| while (chars < charsMax) | ||
|
|
||
| // This method is only called from 2 places and will use length of at least (128/3 and 256/3) respectivly | ||
| // We avoid Vector<T> sine it is unsure how downclocking due to AVX512 would affect total throughput | ||
| if (Vector256.IsHardwareAccelerated | ||
| && Vector256<short>.Count < charCount && charCount <= 2048) | ||
Daniel-Svensson marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| { | ||
| if (*chars >= 0x80) | ||
| break; | ||
| char* lastSimd = chars + charCount - Vector256<short>.Count; | ||
| Vector256<short> mask = Vector256.Create(unchecked((short)0xff80)); | ||
|
|
||
| chars++; | ||
| } | ||
| while (chars < lastSimd) | ||
| { | ||
| if (((*(Vector256<short>*)chars) & mask) != Vector256<short>.Zero) | ||
| goto NonAscii; | ||
|
|
||
| if (chars == charsMax) | ||
| return charCount; | ||
| chars += Vector256<short>.Count; | ||
| } | ||
|
|
||
| char[] chArray = new char[charsMax - chars]; | ||
| for (int i = 0; i < chArray.Length; i++) | ||
| { | ||
| chArray[i] = chars[i]; | ||
| if ((*(Vector256<short>*)lastSimd & mask) == Vector256<short>.Zero) | ||
| return charCount; | ||
| } | ||
| return (int)(chars - (charsMax - charCount)) + GetByteCount(chArray); | ||
|
|
||
| NonAscii: | ||
| int numRemaining = (int)(charsMax - chars); | ||
| int numAscii = charCount - numRemaining; | ||
|
|
||
| return numAscii + (_encoding ?? s_UTF8Encoding).GetByteCount(chars, numRemaining); | ||
|
||
| } | ||
|
|
||
| protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffer, int offset) | ||
|
|
@@ -384,38 +392,51 @@ protected unsafe int UnsafeGetUTF8Chars(char* chars, int charCount, byte[] buffe | |
| fixed (byte* _bytes = &buffer[offset]) | ||
| { | ||
| byte* bytes = _bytes; | ||
| byte* bytesMax = &bytes[buffer.Length - offset]; | ||
| char* charsMax = &chars[charCount]; | ||
|
|
||
| while (true) | ||
| if (Sse41.IsSupported && charCount >= Vector128<short>.Count) | ||
Daniel-Svensson marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
Daniel-Svensson marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| { | ||
| Vector128<short> mask = Vector128.Create(unchecked((short)0xff80)); | ||
| char* simdLast = chars + charCount - Vector128<short>.Count; | ||
|
|
||
| while (chars < simdLast) | ||
| { | ||
| Vector128<short> v = *(Vector128<short>*)chars; | ||
| if (!Sse41.TestZ(v, mask)) | ||
| goto NonAscii; | ||
|
|
||
| Sse2.StoreScalar((long*)bytes, Sse2.PackUnsignedSaturate(v, v).AsInt64()); | ||
| bytes += Vector128<short>.Count; | ||
| chars += Vector128<short>.Count; | ||
| } | ||
|
|
||
| Vector128<short> v2 = Sse2.LoadVector128((short*)simdLast); | ||
| if (!Sse41.TestZ(v2, mask)) | ||
| goto NonAscii; | ||
|
|
||
| Sse2.StoreScalar((long*)(_bytes + charCount - sizeof(long)), Sse2.PackUnsignedSaturate(v2, v2).AsInt64()); | ||
| return charCount; | ||
| } | ||
| // Fast path for small strings, skip and use Encoding.GetBytes for larger strings since it is faster even for the all-Ascii case | ||
| else if (charCount < 16) | ||
| { | ||
| while (chars < charsMax) | ||
| { | ||
| char t = *chars; | ||
| if (t >= 0x80) | ||
| break; | ||
| goto NonAscii; | ||
|
|
||
| *bytes = (byte)t; | ||
| bytes++; | ||
| chars++; | ||
| } | ||
|
|
||
| if (chars >= charsMax) | ||
| break; | ||
|
|
||
| char* charsStart = chars; | ||
| while (chars < charsMax && *chars >= 0x80) | ||
| { | ||
| chars++; | ||
| } | ||
|
|
||
| bytes += (_encoding ?? s_UTF8Encoding).GetBytes(charsStart, (int)(chars - charsStart), bytes, (int)(bytesMax - bytes)); | ||
|
|
||
| if (chars >= charsMax) | ||
| break; | ||
| return charCount; | ||
| } | ||
|
|
||
| return (int)(bytes - _bytes); | ||
| NonAscii: | ||
| byte* bytesMax = _bytes + buffer.Length - offset; | ||
| return (int)(bytes - _bytes) + (_encoding ?? s_UTF8Encoding).GetBytes(chars, (int)(charsMax - chars), bytes, (int)(bytesMax - bytes)); | ||
| } | ||
| } | ||
| return 0; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.